This commit is contained in:
2026-06-02 17:16:47 +02:00
parent dd39339497
commit ec187e673a
18 changed files with 29 additions and 1396 deletions
+24 -7
View File
@@ -1,14 +1,16 @@
"""
import_to_mongo.py
Verze: 1.1
Datum: 2026-06-01
Verze: 1.2
Datum: 2026-06-02
Import Clario CSV do MongoDB (databáze: Clario).
Kolekce: Clario.MayoDiary / Clario.MayoScore (dle názvu souboru)
Kolekce: Clario.MayoDiary / Clario.MayoScore / Clario.eCOA_DCRs / Clario.ECG_DCRs
Filtr: pouze řádky s Country == "Czech Republic"
Klíč: MayoDiary → Subject ID + Form Number
MayoScore → Participant ID + Visit
Klíč: MayoDiary → Subject ID + Form Number
MayoScore → Participant ID + Visit
eCOA_DCRs → Data Correction ID
ECG_DCRs → Data Correction ID
Historie: při změně fields se stará verze uloží do pole history[]
Po importu přesune zpracované CSV do downloads/Zpracovano/
@@ -58,6 +60,16 @@ COLLECTION_CONFIG = {
"Partial Mayo Response for Clinical Non-Responders",
),
},
"eCOA DCRs": {
"collection": "Clario.eCOA_DCRs",
"subject_col": "Subject ID",
"key_cols": ("Data Correction ID",),
},
"ECG DCRs": {
"collection": "Clario.ECG_DCRs",
"subject_col": "Subject Number",
"key_cols": ("Data Correction ID",),
},
}
DATE_FORMATS = [
@@ -120,7 +132,9 @@ def map_row(row: dict, col_type: str) -> dict:
subject_col = cfg["subject_col"]
doc["subject"] = {"id": cleaned.get(subject_col, "")}
doc["site"] = {"name": cleaned.get("Site", "")}
# ECG DCRs používají "Site ID" místo "Site"
site_name = cleaned.get("Site") or cleaned.get("Site ID", "")
doc["site"] = {"name": site_name}
doc["country"] = cleaned.get("Country", "")
doc["study"] = cleaned.get("Protocol", "")
@@ -173,7 +187,7 @@ def import_file(csv_path: str, db) -> dict:
for row in reader:
cleaned_row = {clean_colname(k): v for k, v in row.items()}
country = cleaned_row.get("Country", "").strip()
if country != COUNTRY_FILTER:
if COUNTRY_FILTER not in country:
filtered_out += 1
continue
@@ -221,6 +235,9 @@ def import_file(csv_path: str, db) -> dict:
collection.create_index([("site.name", ASCENDING)])
if col_type == "MayoScore":
collection.create_index([("Site Action", ASCENDING)])
if col_type in ("eCOA DCRs", "ECG DCRs"):
collection.create_index([("fields.Status", ASCENDING)])
collection.create_index([("fields.Type", ASCENDING)])
stats = {
"collection": col_name,