diff --git a/Medidata/edc_import.log b/Medidata/edc_import.log index 9fc2fa1..f4fe7f3 100644 --- a/Medidata/edc_import.log +++ b/Medidata/edc_import.log @@ -21,3 +21,9 @@ 2026-05-20 18:14:22,340 INFO nové: 2087 aktualizované: 4 chyby: 0 2026-05-20 18:14:22,340 INFO ============================================================ 2026-05-20 18:14:22,340 INFO Celkem — nové: 2091 aktualizované: 4 chyby: 0 +2026-05-20 21:56:49,619 INFO Importuji: downloads/2026-05-20_15-21_EDC_MDD3003_QueryDetails.csv → edc.queries + queries_snapshots [2026-05-20] +2026-05-20 21:56:49,670 INFO nové: 0 aktualizované: 4 chyby: 0 +2026-05-20 21:56:49,711 INFO Importuji: downloads/2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv → edc.queries + queries_snapshots [2026-05-20] +2026-05-20 21:57:07,554 INFO nové: 0 aktualizované: 2091 chyby: 0 +2026-05-20 21:57:07,554 INFO ============================================================ +2026-05-20 21:57:07,554 INFO Celkem — nové: 0 aktualizované: 2095 chyby: 0 diff --git a/Medidata/edc_import.py b/Medidata/edc_import.py index 9dda174..b53674a 100644 --- a/Medidata/edc_import.py +++ b/Medidata/edc_import.py @@ -170,6 +170,31 @@ def ensure_query_indexes(collection) -> None: collection.create_index([("openedDate", ASCENDING)]) +def ensure_snapshot_indexes(collection) -> None: + """Indexy pro queries_snapshots — unikátní kombinace queryId + snapshotDate.""" + collection.create_index( + [("queryId", ASCENDING), ("snapshotDate", ASCENDING)], + unique=True, + ) + collection.create_index([("snapshotDate", ASCENDING)]) + collection.create_index([("queryStatus", ASCENDING)]) + collection.create_index([("site.number", ASCENDING)]) + collection.create_index([("subject.label", ASCENDING)]) + + +def extract_snapshot_date(filename: str) -> str: + """ + Vytáhne datum ze jména souboru. + '2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv' → '2026-05-20' + Fallback: dnešní datum. + """ + stem = Path(filename).name + match = re.match(r"(\d{4}-\d{2}-\d{2})", stem) + if match: + return match.group(1) + return datetime.now(timezone.utc).strftime("%Y-%m-%d") + + def parse_date(value: str) -> str | None: """Pokusí se převést string na ISO 8601; jinak vrátí None.""" value = value.strip() @@ -294,8 +319,16 @@ def ensure_indexes(collection) -> None: collection.create_index([("lastModified", ASCENDING)]) -def import_file(csv_path: str, collection) -> tuple[int, int, int]: - """Importuje jeden CSV soubor. Vrátí (inserted, updated, errors).""" +def import_file( + csv_path: str, + collection, + snapshot_col=None, + snapshot_date: str | None = None, +) -> tuple[int, int, int]: + """ + Importuje jeden CSV soubor. Vrátí (inserted, updated, errors). + snapshot_col: pokud je zadán, pro QueryDetails se zapíše i daily snapshot. + """ inserted = updated = errors = 0 source_file = Path(csv_path).name @@ -308,6 +341,15 @@ def import_file(csv_path: str, collection) -> tuple[int, int, int]: if query_mode: doc = map_query_row(row, source_file) upsert_key = {"queryId": doc["queryId"]} + + # Snapshot — upsert na (queryId, snapshotDate) + if snapshot_col is not None and snapshot_date: + snap_doc = {**doc, "snapshotDate": snapshot_date} + snapshot_col.update_one( + {"queryId": doc["queryId"], "snapshotDate": snapshot_date}, + {"$set": snap_doc}, + upsert=True, + ) else: doc = map_row(row, source_file) record_id = doc.get("form", {}).get("recordId") @@ -377,13 +419,22 @@ def main() -> None: col_name = "queries" collection = db[col_name] ensure_query_indexes(collection) + snapshot_col = db["queries_snapshots"] + ensure_snapshot_indexes(snapshot_col) + snapshot_date = extract_snapshot_date(csv_path) + log.info("Importuji: %s → %s.%s + queries_snapshots [%s]", + csv_path, args.db, col_name, snapshot_date) else: col_name = collection_name_from_filename(csv_path) collection = db[col_name] ensure_indexes(collection) + snapshot_col = None + snapshot_date = None + log.info("Importuji: %s → %s.%s", csv_path, args.db, col_name) - log.info("Importuji: %s → %s.%s", csv_path, args.db, col_name) - inserted, updated, errors = import_file(csv_path, collection) + inserted, updated, errors = import_file( + csv_path, collection, snapshot_col, snapshot_date + ) total_inserted += inserted total_updated += updated total_errors += errors