diff --git a/CTMS/PanoramaContacts/SourceData/PANORAMA Dashboard (32).xlsx b/CTMS/PanoramaContacts/SourceData/PANORAMA Dashboard (32).xlsx new file mode 100644 index 0000000..e72322d Binary files /dev/null and b/CTMS/PanoramaContacts/SourceData/PANORAMA Dashboard (32).xlsx differ diff --git a/CTMS/PanoramaContacts/SourceData/PANORAMA Dashboard (33).xlsx b/CTMS/PanoramaContacts/SourceData/PANORAMA Dashboard (33).xlsx new file mode 100644 index 0000000..6e9d573 Binary files /dev/null and b/CTMS/PanoramaContacts/SourceData/PANORAMA Dashboard (33).xlsx differ diff --git a/CTMS/PanoramaContacts/import_CZ_contacts.py b/CTMS/PanoramaContacts/import_CZ_contacts.py new file mode 100644 index 0000000..5bd2f6e --- /dev/null +++ b/CTMS/PanoramaContacts/import_CZ_contacts.py @@ -0,0 +1,158 @@ +""" +import_CZ_contacts.py +Importuje kontakty středisek Czechia z PANORAMA Dashboard xlsx do MySQL tabulky CTMS_contacts. +- Filtruje pouze řádky Country Name == 'Czechia' +- file_date bere z document properties xlsx (dcterms:created) +- Před importem smaže stávající záznamy se stejným file_date + country_name == 'Czechia' +""" + +import zipfile +import xml.etree.ElementTree as ET +from datetime import datetime, timezone +from pathlib import Path + +import pandas as pd +import mysql.connector + +# ── Konfigurace ──────────────────────────────────────────────────────────────── +SOURCE_FILE = Path(r"U:\PythonProject\Janssen\CTMS\PanoramaContacts\SourceData\PANORAMA Dashboard (33).xlsx") + +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3306, + "user": "root", + "password": "Vlado9674+", + "database": "studie", + "charset": "utf8mb4", +} + +TABLE = "CTMS_contacts" +COUNTRY = "Czechia" +SHEET = "Site Contacts" +HEADER_ROW = 5 # 0-based → řádek č. 6 v Excelu + + +# ── Pomocné funkce ───────────────────────────────────────────────────────────── +def get_file_created_date(xlsx_path: Path) -> datetime.date: + """Vrátí datum vytvoření souboru z docProps/core.xml (dcterms:created).""" + ns = {"dcterms": "http://purl.org/dc/terms/"} + with zipfile.ZipFile(xlsx_path) as z: + with z.open("docProps/core.xml") as f: + root = ET.parse(f).getroot() + created_el = root.find("{http://purl.org/dc/terms/}created") + dt = datetime.fromisoformat(created_el.text.replace("Z", "+00:00")) + return dt.astimezone(timezone.utc).date() + + +def clean_value(val): + """Převede NaN / NaT / float na None, jinak vrátí string nebo date.""" + if val is None: + return None + if isinstance(val, float): + import math + return None if math.isnan(val) else val + if hasattr(val, "_value"): # pd.NaT + return None + try: + import pandas as pd + if pd.isna(val): + return None + except Exception: + pass + if isinstance(val, pd.Timestamp): + return val.date() if not pd.isnull(val) else None + return val + + +# ── Hlavní logika ────────────────────────────────────────────────────────────── +def main(): + print(f"Soubor : {SOURCE_FILE}") + + # 1) Datum vytvoření z properties + file_date = get_file_created_date(SOURCE_FILE) + print(f"file_date (z docProps): {file_date}") + + # 2) Načtení dat + print("Načítám Excel…") + df = pd.read_excel(SOURCE_FILE, sheet_name=SHEET, header=HEADER_ROW) + + # 3) Filtr CZ + df_cz = df[df["Country Name"] == COUNTRY].copy() + print(f"Řádků CZ: {len(df_cz)}") + + # 4) Mapování Excel sloupců → DB sloupce + col_map = { + "Sector": "sector", + "TA": "ta", + "Protocol ID": "protocol_id", + "GTL-GTM/CTM": "gtl_ctm", + "Country Name": "country_name", + "LTM Name": "ltm_name", + "Site ID": "site_id", + "SM Name": "sm_name", + "PI Full Name": "pi_full_name", + "Institution Name": "institution_name", + "Contact Identifier": "contact_identifier", + "Title": "contact_title", + "Last Name": "last_name", + "First Name": "first_name", + "Contact Role": "contact_role", + "Contact Type": "contact_type", + "Pr St Cont Primary Indicator": "primary_indicator", + "SUA Reporting Indicator": "sua_reporting_indicator", + "Financial Disclosure Indicator": "financial_disclosure_indicator", + "Contact Phone Number": "phone", + "Alternative Phone Number": "phone_alt", + "Mobile Phone Number": "phone_mobile", + "Contact Fax Number": "fax", + "Contact Email Address": "email", + "SUA Reporting Email Address": "email_sua", + "Contact Start Date": "contact_start_date", + "Contact End Date": "contact_end_date", + "Degree/qualification": "degree_qualification", + "Job Title": "job_title", + "Contact Address Line 1": "address_line1", + "Contact Address Line 2": "address_line2", + "Contact Address Line 3": "address_line3", + "Contact City": "city", + "Contact Addr State/Province": "state_province", + "Contact Zip/Postal Code": "zip_postal_code", + } + + df_cz = df_cz.rename(columns=col_map) + db_cols = list(col_map.values()) + + # 5) Připojení k DB + print("Připojuji se k MySQL…") + conn = mysql.connector.connect(**DB_CONFIG) + cursor = conn.cursor() + + # 6) Smazání stávajících záznamů pro stejný file_date + CZ (idempotentní import) + cursor.execute( + f"DELETE FROM {TABLE} WHERE file_date = %s AND country_name = %s", + (file_date, COUNTRY) + ) + deleted = cursor.rowcount + print(f"Smazáno starých záznamů: {deleted}") + + # 7) Insert + placeholders = ", ".join(["%s"] * (len(db_cols) + 1)) # +1 pro file_date + insert_cols = "file_date, " + ", ".join(db_cols) + sql_insert = f"INSERT INTO {TABLE} ({insert_cols}) VALUES ({placeholders})" + + inserted = 0 + for _, row in df_cz.iterrows(): + values = [file_date] + [clean_value(row.get(col)) for col in db_cols] + cursor.execute(sql_insert, values) + inserted += 1 + + conn.commit() + cursor.close() + conn.close() + + print(f"Importováno záznamů : {inserted}") + print("Hotovo OK") + + +if __name__ == "__main__": + main() diff --git a/CTMS/PanoramaContacts/sql/create_CTMS_contacts.sql b/CTMS/PanoramaContacts/sql/create_CTMS_contacts.sql new file mode 100644 index 0000000..850ed2e --- /dev/null +++ b/CTMS/PanoramaContacts/sql/create_CTMS_contacts.sql @@ -0,0 +1,83 @@ +-- ============================================================ +-- Databáze : studie +-- Tabulka : CTMS_contacts +-- Popis : Kontakty center ze systému PANORAMA (CTMS) +-- studie 42847922MDD3003 (Neuroscience) +-- Vytvořeno : 2026-05-07 +-- ============================================================ + +USE studie; + +CREATE TABLE IF NOT EXISTS CTMS_contacts ( + -- ── Interní klíče ────────────────────────────────────────── + id INT UNSIGNED NOT NULL AUTO_INCREMENT, + file_date DATE NOT NULL COMMENT 'Datum vytvoření zdrojového souboru (PANORAMA export)', + imported_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'Datum a čas importu záznamu do DB', + + -- ── Studie / organizace ──────────────────────────────────── + sector VARCHAR(20) COMMENT 'Pharma / ...', + ta VARCHAR(30) COMMENT 'Therapeutic Area', + protocol_id VARCHAR(20) COMMENT 'Protocol ID (např. 42847922MDD3003)', + gtl_ctm VARCHAR(50) COMMENT 'GTL-GTM/CTM jméno', + + -- ── Lokalita (site) ──────────────────────────────────────── + country_name VARCHAR(60) COMMENT 'Název země', + ltm_name VARCHAR(50) COMMENT 'LTM Name', + site_id VARCHAR(15) COMMENT 'Identifikátor střediska (např. S10-CZ10008)', + sm_name VARCHAR(60) COMMENT 'Site Manager Name', + pi_full_name VARCHAR(80) COMMENT 'Principal Investigator – celé jméno', + institution_name VARCHAR(100) COMMENT 'Název instituce / kliniky', + + -- ── Kontaktní osoba ──────────────────────────────────────── + contact_identifier INT UNSIGNED COMMENT 'PANORAMA interní ID kontaktu', + contact_title VARCHAR(25) COMMENT 'Titul (Mr, Ms, Dr, ...)', + last_name VARCHAR(50) COMMENT 'Příjmení', + first_name VARCHAR(40) COMMENT 'Jméno', + contact_role VARCHAR(50) COMMENT 'Role kontaktu (Study Coordinator, PI, ...)', + contact_type VARCHAR(30) COMMENT 'Typ kontaktu (Study-Site Staff, ...)', + + -- ── Indikátory ───────────────────────────────────────────── + primary_indicator ENUM('Yes','No') COMMENT 'Pr St Cont Primary Indicator', + sua_reporting_indicator ENUM('Yes','No') COMMENT 'SUA Reporting Indicator', + financial_disclosure_indicator ENUM('Yes','No') COMMENT 'Financial Disclosure Indicator', + + -- ── Kontaktní údaje ──────────────────────────────────────── + phone VARCHAR(40) COMMENT 'Hlavní telefonní číslo', + phone_alt VARCHAR(40) COMMENT 'Alternativní telefonní číslo', + phone_mobile VARCHAR(40) COMMENT 'Mobilní číslo', + fax VARCHAR(40) COMMENT 'Faxové číslo', + email VARCHAR(100) COMMENT 'Hlavní e-mailová adresa', + email_sua VARCHAR(100) COMMENT 'SUA Reporting e-mail', + + -- ── Datumy ───────────────────────────────────────────────── + contact_start_date DATE COMMENT 'Datum začátku platnosti kontaktu', + contact_end_date DATE COMMENT 'Datum konce platnosti kontaktu', + + -- ── Kvalifikace ──────────────────────────────────────────── + degree_qualification VARCHAR(30) COMMENT 'Titul / kvalifikace', + job_title VARCHAR(40) COMMENT 'Pracovní pozice', + + -- ── Adresa ───────────────────────────────────────────────── + address_line1 VARCHAR(100) COMMENT 'Adresní řádek 1', + address_line2 VARCHAR(60) COMMENT 'Adresní řádek 2', + address_line3 VARCHAR(100) COMMENT 'Adresní řádek 3', + city VARCHAR(50) COMMENT 'Město', + state_province VARCHAR(40) COMMENT 'Stát / provincie', + zip_postal_code VARCHAR(20) COMMENT 'PSČ', + + -- ── Klíče ────────────────────────────────────────────────── + PRIMARY KEY (id), + + -- Rychlé vyhledávání podle nejčastěji dotazovaných polí + INDEX idx_file_date (file_date), + INDEX idx_country (country_name), + INDEX idx_site_id (site_id), + INDEX idx_protocol (protocol_id), + INDEX idx_contact_role (contact_role), + INDEX idx_email (email), + INDEX idx_contact_identifier (contact_identifier) + +) ENGINE=InnoDB + DEFAULT CHARSET=utf8mb4 + COLLATE=utf8mb4_unicode_ci + COMMENT='CTMS contacts – Site Contacts, studie 42847922MDD3003';