This commit is contained in:
2026-06-15 16:10:47 +02:00
parent 36aa84aa02
commit 495cf8da21
34 changed files with 8012 additions and 8 deletions
+20 -8
View File
@@ -1,7 +1,7 @@
"""
import_to_mongo.py
Verze: 1.2
Datum: 2026-06-02
Verze: 1.3
Datum: 2026-06-15
Import Clario CSV do MongoDB (databáze: Clario).
@@ -11,7 +11,8 @@ Klíč: MayoDiary → Subject ID + Form Number
MayoScore → Participant ID + Visit
eCOA_DCRs → Data Correction ID
ECG_DCRs → Data Correction ID
Historie: při změně fields se stará verze uloží do pole history[]
Historie: při změně jakéhokoliv datového sloupce (fields + outcome cols) se stará
verze uloží do pole history[] spolu s outcome poli
Po importu přesune zpracované CSV do downloads/Zpracovano/
Použití:
@@ -119,6 +120,14 @@ def detect_collection_type(filename: str) -> str | None:
return None
def data_snapshot(doc: dict, outcome_cols: tuple) -> dict:
"""Porovnatelný snapshot všech datových polí: fields{} + outcome cols."""
snap = {"fields": doc.get("fields", {})}
for col in outcome_cols:
snap[col] = doc.get(col)
return snap
# ---------------------------------------------------------------------------
# CSV → dokument
# ---------------------------------------------------------------------------
@@ -176,6 +185,7 @@ def import_file(csv_path: str, db) -> dict:
cfg = COLLECTION_CONFIG[col_type]
col_name = cfg["collection"]
outcome_cols = tuple(cfg.get("outcome_cols", ()))
snapshot_date = extract_snapshot_date(filename)
collection = db[col_name]
@@ -207,11 +217,13 @@ def import_file(csv_path: str, db) -> dict:
collection.insert_one(doc)
inserted += 1
elif existing.get("fields") != doc["fields"]:
old_entry = {
"date": existing.get("lastSeen", snapshot_date),
"fields": existing["fields"],
}
elif data_snapshot(existing, outcome_cols) != data_snapshot(doc, outcome_cols):
# Uložíme kompletní snapshot starého stavu (fields + outcome cols)
old_entry = {"date": existing.get("lastSeen", snapshot_date)}
for col in outcome_cols:
old_entry[col] = existing.get(col)
old_entry["fields"] = existing.get("fields", {})
update_doc = {k: v for k, v in doc.items()}
update_doc["lastSeen"] = snapshot_date
collection.update_one(