diff --git a/EmailsImport/janssenpc_file_send_v2.1.py b/EmailsImport/janssenpc_file_send_v2.1.py index 3f50220..15bcc11 100644 --- a/EmailsImport/janssenpc_file_send_v2.1.py +++ b/EmailsImport/janssenpc_file_send_v2.1.py @@ -132,18 +132,22 @@ def prejmenuj(directory: Path) -> None: if 'PANORAMA Dashboard' in filename: log(f" Detekován PANORAMA Site Contacts: {filename}") try: - xl = pd.ExcelFile(file_path) - if 'Site Contacts' in xl.sheet_names: - df_a1 = pd.read_excel(file_path, sheet_name='Site Contacts', nrows=1, header=None) - a1 = str(df_a1.iloc[0, 0]) if not df_a1.empty else '' - if 'Title: Site Contacts' in a1: - new_name = f"{get_timestamp(file_path)} PANORAMA Site Contacts.xlsx" - f.rename(directory / new_name) - log(f" ÚSPĚCH: -> '{new_name}'") + with pd.ExcelFile(file_path) as xl: + sheet_names = xl.sheet_names + if 'Site Contacts' in sheet_names: + df_a1 = xl.parse('Site Contacts', nrows=1, header=None) + a1 = str(df_a1.iloc[0, 0]) if not df_a1.empty else '' else: - log(f" PŘESKOČENO: A1 neodpovídá vzoru ({a1[:50]})") - else: + a1 = None + # soubor je nyní zavřen — přejmenování proběhne bez chyby + if a1 is None: log(f" PŘESKOČENO: List 'Site Contacts' nenalezen.") + elif 'Title: Site Contacts' in a1: + new_name = f"{get_timestamp(file_path)} PANORAMA Site Contacts.xlsx" + f.rename(directory / new_name) + log(f" ÚSPĚCH: -> '{new_name}'") + else: + log(f" PŘESKOČENO: A1 neodpovídá vzoru ({a1[:50]})") except Exception as e: log(f" CHYBA: {e}") continue diff --git a/Panorama/Downloads/PANORAMA Dashboard (1).xlsx b/Panorama/Downloads/PANORAMA Dashboard (1).xlsx deleted file mode 100644 index 8fc49ea..0000000 Binary files a/Panorama/Downloads/PANORAMA Dashboard (1).xlsx and /dev/null differ diff --git a/Panorama/Downloads/PANORAMA Dashboard.xlsx b/Panorama/Downloads/PANORAMA Dashboard.xlsx deleted file mode 100644 index 79f1108..0000000 Binary files a/Panorama/Downloads/PANORAMA Dashboard.xlsx and /dev/null differ diff --git a/EmailsImport/PANORAMA Dashboard (1).xlsx b/Panorama/Downloads/Zpracovano/2026-05-28_11-32-29 PANORAMA Site Contacts.xlsx similarity index 99% rename from EmailsImport/PANORAMA Dashboard (1).xlsx rename to Panorama/Downloads/Zpracovano/2026-05-28_11-32-29 PANORAMA Site Contacts.xlsx index 8fc49ea..7ea7300 100644 Binary files a/EmailsImport/PANORAMA Dashboard (1).xlsx and b/Panorama/Downloads/Zpracovano/2026-05-28_11-32-29 PANORAMA Site Contacts.xlsx differ diff --git a/EmailsImport/PANORAMA Dashboard.xlsx b/Panorama/Downloads/Zpracovano/2026-05-28_11-40-23 PANORAMA Site Contacts.xlsx similarity index 99% rename from EmailsImport/PANORAMA Dashboard.xlsx rename to Panorama/Downloads/Zpracovano/2026-05-28_11-40-23 PANORAMA Site Contacts.xlsx index 79f1108..1fdb88b 100644 Binary files a/EmailsImport/PANORAMA Dashboard.xlsx and b/Panorama/Downloads/Zpracovano/2026-05-28_11-40-23 PANORAMA Site Contacts.xlsx differ diff --git a/Panorama/import_to_mongo.py b/Panorama/import_to_mongo.py index a4c2a36..76832f0 100644 --- a/Panorama/import_to_mongo.py +++ b/Panorama/import_to_mongo.py @@ -5,6 +5,7 @@ Podporované typy: - Issues & Deviations → kolekce IssuesAndDeviations (klíč: ID / fuzzy+hash) - Site Visit Details → kolekce Visits (klíč: Site Visit ID (Technical)) - FUL details → kolekce FUL (klíč: SVR Document Number) + - PANORAMA Site Contacts → kolekce contacts (klíč: Contact Identifier) Filtr: pouze řádky s Country Name == "Czechia" Historie: při změně fields se stará verze uloží do pole history[] @@ -67,6 +68,18 @@ REPORT_TYPES = { "fields.FUL Missing?", "fields.FUL Document Status", ], }, + "contacts": { + "pattern": re.compile(r"PANORAMA Site Contacts\.xlsx$", re.IGNORECASE), + "collection": "contacts", + "upsert_key": None, + "composite_keys": ["Contact Identifier", "Protocol ID", "Site ID", "Contact Role"], + "no_country_filter": True, + "indexes": [ + "fields.Country Name", "fields.Site ID", + "fields.Protocol ID", "fields.Contact Role", + "fields.Contact Email Address", "fields.Contact Identifier", + ], + }, } @@ -144,6 +157,7 @@ def import_file(xlsx_path: str, collection, report_cfg: dict) -> dict: upsert_key = report_cfg["upsert_key"] collection_name = report_cfg["collection"] use_fuzzy = (collection_name == "IssuesAndDeviations") + apply_country_filter = COUNTRY_FILTER and not report_cfg.get("no_country_filter") wb = openpyxl.load_workbook(xlsx_path, read_only=True) ws = wb[wb.sheetnames[0]] @@ -161,7 +175,7 @@ def import_file(xlsx_path: str, collection, report_cfg: dict) -> dict: raw = dict(zip(header, row)) country = (raw.get("Country Name") or "") - if COUNTRY_FILTER and country != COUNTRY_FILTER: + if apply_country_filter and country != COUNTRY_FILTER: filtered_out += 1 continue @@ -171,10 +185,16 @@ def import_file(xlsx_path: str, collection, report_cfg: dict) -> dict: continue fields[k] = clean_value(v) - record_id = raw.get(upsert_key) + composite_keys = report_cfg.get("composite_keys") + record_id = raw.get(upsert_key) if upsert_key else None has_id = record_id is not None - if has_id: + if composite_keys: + key_parts = [str(raw.get(k) or "").strip() for k in composite_keys] + h = hashlib.sha1("|".join(key_parts).encode("utf-8")).hexdigest()[:16] + record_id = f"C-{h}" + existing = collection.find_one({"record_id": record_id}) + elif has_id: record_id = str(int(record_id)) if isinstance(record_id, (int, float)) else str(record_id).strip() existing = collection.find_one({"record_id": record_id}) elif use_fuzzy: @@ -260,7 +280,7 @@ def import_file(xlsx_path: str, collection, report_cfg: dict) -> dict: if processed != xlsx_count: print(f" !!! VAROVANI: zpracovano {processed} radku, ale v XLSX je {xlsx_count} datovych radku") - if db_count is not None and db_count != expected_in_db: + if db_count is not None and db_count != expected_in_db and not report_cfg.get("composite_keys"): print(f" !!! VAROVANI: v DB je {db_count} dokumentu pro Protocol ID {protocol_id}, ocekavano {expected_in_db} (XLSX {xlsx_count} - filtered {filtered_out})") return stats