notebook
This commit is contained in:
@@ -21,3 +21,9 @@
|
|||||||
2026-05-20 18:14:22,340 INFO nové: 2087 aktualizované: 4 chyby: 0
|
2026-05-20 18:14:22,340 INFO nové: 2087 aktualizované: 4 chyby: 0
|
||||||
2026-05-20 18:14:22,340 INFO ============================================================
|
2026-05-20 18:14:22,340 INFO ============================================================
|
||||||
2026-05-20 18:14:22,340 INFO Celkem — nové: 2091 aktualizované: 4 chyby: 0
|
2026-05-20 18:14:22,340 INFO Celkem — nové: 2091 aktualizované: 4 chyby: 0
|
||||||
|
2026-05-20 21:56:49,619 INFO Importuji: downloads/2026-05-20_15-21_EDC_MDD3003_QueryDetails.csv → edc.queries + queries_snapshots [2026-05-20]
|
||||||
|
2026-05-20 21:56:49,670 INFO nové: 0 aktualizované: 4 chyby: 0
|
||||||
|
2026-05-20 21:56:49,711 INFO Importuji: downloads/2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv → edc.queries + queries_snapshots [2026-05-20]
|
||||||
|
2026-05-20 21:57:07,554 INFO nové: 0 aktualizované: 2091 chyby: 0
|
||||||
|
2026-05-20 21:57:07,554 INFO ============================================================
|
||||||
|
2026-05-20 21:57:07,554 INFO Celkem — nové: 0 aktualizované: 2095 chyby: 0
|
||||||
|
|||||||
+55
-4
@@ -170,6 +170,31 @@ def ensure_query_indexes(collection) -> None:
|
|||||||
collection.create_index([("openedDate", ASCENDING)])
|
collection.create_index([("openedDate", ASCENDING)])
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_snapshot_indexes(collection) -> None:
|
||||||
|
"""Indexy pro queries_snapshots — unikátní kombinace queryId + snapshotDate."""
|
||||||
|
collection.create_index(
|
||||||
|
[("queryId", ASCENDING), ("snapshotDate", ASCENDING)],
|
||||||
|
unique=True,
|
||||||
|
)
|
||||||
|
collection.create_index([("snapshotDate", ASCENDING)])
|
||||||
|
collection.create_index([("queryStatus", ASCENDING)])
|
||||||
|
collection.create_index([("site.number", ASCENDING)])
|
||||||
|
collection.create_index([("subject.label", ASCENDING)])
|
||||||
|
|
||||||
|
|
||||||
|
def extract_snapshot_date(filename: str) -> str:
|
||||||
|
"""
|
||||||
|
Vytáhne datum ze jména souboru.
|
||||||
|
'2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv' → '2026-05-20'
|
||||||
|
Fallback: dnešní datum.
|
||||||
|
"""
|
||||||
|
stem = Path(filename).name
|
||||||
|
match = re.match(r"(\d{4}-\d{2}-\d{2})", stem)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
|
||||||
def parse_date(value: str) -> str | None:
|
def parse_date(value: str) -> str | None:
|
||||||
"""Pokusí se převést string na ISO 8601; jinak vrátí None."""
|
"""Pokusí se převést string na ISO 8601; jinak vrátí None."""
|
||||||
value = value.strip()
|
value = value.strip()
|
||||||
@@ -294,8 +319,16 @@ def ensure_indexes(collection) -> None:
|
|||||||
collection.create_index([("lastModified", ASCENDING)])
|
collection.create_index([("lastModified", ASCENDING)])
|
||||||
|
|
||||||
|
|
||||||
def import_file(csv_path: str, collection) -> tuple[int, int, int]:
|
def import_file(
|
||||||
"""Importuje jeden CSV soubor. Vrátí (inserted, updated, errors)."""
|
csv_path: str,
|
||||||
|
collection,
|
||||||
|
snapshot_col=None,
|
||||||
|
snapshot_date: str | None = None,
|
||||||
|
) -> tuple[int, int, int]:
|
||||||
|
"""
|
||||||
|
Importuje jeden CSV soubor. Vrátí (inserted, updated, errors).
|
||||||
|
snapshot_col: pokud je zadán, pro QueryDetails se zapíše i daily snapshot.
|
||||||
|
"""
|
||||||
inserted = updated = errors = 0
|
inserted = updated = errors = 0
|
||||||
source_file = Path(csv_path).name
|
source_file = Path(csv_path).name
|
||||||
|
|
||||||
@@ -308,6 +341,15 @@ def import_file(csv_path: str, collection) -> tuple[int, int, int]:
|
|||||||
if query_mode:
|
if query_mode:
|
||||||
doc = map_query_row(row, source_file)
|
doc = map_query_row(row, source_file)
|
||||||
upsert_key = {"queryId": doc["queryId"]}
|
upsert_key = {"queryId": doc["queryId"]}
|
||||||
|
|
||||||
|
# Snapshot — upsert na (queryId, snapshotDate)
|
||||||
|
if snapshot_col is not None and snapshot_date:
|
||||||
|
snap_doc = {**doc, "snapshotDate": snapshot_date}
|
||||||
|
snapshot_col.update_one(
|
||||||
|
{"queryId": doc["queryId"], "snapshotDate": snapshot_date},
|
||||||
|
{"$set": snap_doc},
|
||||||
|
upsert=True,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
doc = map_row(row, source_file)
|
doc = map_row(row, source_file)
|
||||||
record_id = doc.get("form", {}).get("recordId")
|
record_id = doc.get("form", {}).get("recordId")
|
||||||
@@ -377,13 +419,22 @@ def main() -> None:
|
|||||||
col_name = "queries"
|
col_name = "queries"
|
||||||
collection = db[col_name]
|
collection = db[col_name]
|
||||||
ensure_query_indexes(collection)
|
ensure_query_indexes(collection)
|
||||||
|
snapshot_col = db["queries_snapshots"]
|
||||||
|
ensure_snapshot_indexes(snapshot_col)
|
||||||
|
snapshot_date = extract_snapshot_date(csv_path)
|
||||||
|
log.info("Importuji: %s → %s.%s + queries_snapshots [%s]",
|
||||||
|
csv_path, args.db, col_name, snapshot_date)
|
||||||
else:
|
else:
|
||||||
col_name = collection_name_from_filename(csv_path)
|
col_name = collection_name_from_filename(csv_path)
|
||||||
collection = db[col_name]
|
collection = db[col_name]
|
||||||
ensure_indexes(collection)
|
ensure_indexes(collection)
|
||||||
|
snapshot_col = None
|
||||||
|
snapshot_date = None
|
||||||
|
log.info("Importuji: %s → %s.%s", csv_path, args.db, col_name)
|
||||||
|
|
||||||
log.info("Importuji: %s → %s.%s", csv_path, args.db, col_name)
|
inserted, updated, errors = import_file(
|
||||||
inserted, updated, errors = import_file(csv_path, collection)
|
csv_path, collection, snapshot_col, snapshot_date
|
||||||
|
)
|
||||||
total_inserted += inserted
|
total_inserted += inserted
|
||||||
total_updated += updated
|
total_updated += updated
|
||||||
total_errors += errors
|
total_errors += errors
|
||||||
|
|||||||
Reference in New Issue
Block a user