From afd9b3ef1715a7685989c2f73e26ca2abc1c8daa Mon Sep 17 00:00:00 2001
From: "vladimir.buzalka" <vladimir.buzalka@buzalka.cz>
Date: Tue, 5 May 2026 10:40:13 +0200
Subject: [PATCH] z230

---
 .../create_iwrs_tables.sql                    | 110 +++++
 IWRS/Přehledpacientůstručný/db_config.py      |   5 +
 .../Přehledpacientůstručný/import_to_mysql.py | 358 +++++++++++++++
 IWRS/Přehledpacientůstručný/run_all.py        | 422 ++++++++++++++++++
 4 files changed, 895 insertions(+)
 create mode 100644 IWRS/Přehledpacientůstručný/create_iwrs_tables.sql
 create mode 100644 IWRS/Přehledpacientůstručný/db_config.py
 create mode 100644 IWRS/Přehledpacientůstručný/import_to_mysql.py
 create mode 100644 IWRS/Přehledpacientůstručný/run_all.py

diff --git a/IWRS/Přehledpacientůstručný/create_iwrs_tables.sql b/IWRS/Přehledpacientůstručný/create_iwrs_tables.sql
new file mode 100644
index 0000000..b20e6c0
--- /dev/null
+++ b/IWRS/Přehledpacientůstručný/create_iwrs_tables.sql
@@ -0,0 +1,110 @@
+-- IWRS tabulky pro databázi studie
+-- Spustit jednou: mysql -h 192.168.1.76 -u root -p studie < create_iwrs_tables.sql
+
+USE studie;
+
+-- ── Import log ───────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_import (
+    import_id   INT AUTO_INCREMENT PRIMARY KEY,
+    study       VARCHAR(20)  NOT NULL,
+    imported_at DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    source_file VARCHAR(500) NOT NULL,
+    INDEX idx_study (study)
+);
+
+-- ── UCO3001 subject summary ───────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_uco3001_subject_summary (
+    id                                  INT AUTO_INCREMENT PRIMARY KEY,
+    import_id                           INT          NOT NULL,
+    subject                             VARCHAR(20)  NOT NULL,
+    prior_subject_identifier            VARCHAR(20),
+    site                                VARCHAR(50),
+    investigator                        VARCHAR(100),
+    location                            VARCHAR(50),
+    cohort_per_irt                      VARCHAR(100),
+    informed_consent_date               DATE,
+    adolescent_assent_date              DATE,
+    age                                 SMALLINT,
+    weight                              DECIMAL(5,1),
+    rescreened_subject                  VARCHAR(10),
+    adt_ir                              VARCHAR(10),
+    three_or_more_advanced_therapies    VARCHAR(10),
+    only_oral_5asa_compounds            VARCHAR(10),
+    ustekinumab                         VARCHAR(10),
+    isolated_proctitis                  VARCHAR(10),
+    clinical_responder_status_i12_m0    VARCHAR(100),
+    irt_subject_status                  VARCHAR(50),
+    i0_rand_date_local                  DATE,
+    last_irt_transaction                VARCHAR(100),
+    last_irt_transaction_date_local     DATE,
+    last_irt_transaction_date_utc       DATE,
+    next_irt_transaction                VARCHAR(100),
+    next_irt_transaction_date_local     DATE,
+    most_recent_med_assignment_date     DATE,
+    days_since_last_med_assignment      SMALLINT,
+    patient_forecast_status             VARCHAR(50),
+    patient_forecast_status_changed_date DATE,
+    FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
+    INDEX idx_import (import_id),
+    INDEX idx_subject (subject)
+);
+
+-- ── MDD3003 subject summary ───────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_mdd3003_subject_summary (
+    id                                      INT AUTO_INCREMENT PRIMARY KEY,
+    import_id                               INT          NOT NULL,
+    subject                                 VARCHAR(20)  NOT NULL,
+    prior_subject_identifier                VARCHAR(20),
+    site                                    VARCHAR(50),
+    investigator                            VARCHAR(100),
+    location                                VARCHAR(50),
+    cohort_per_irt                          VARCHAR(50),
+    madrs_criteria_integrated               VARCHAR(50),
+    informed_consent_date                   DATE,
+    age                                     SMALLINT,
+    madrs_criteria_v15                      VARCHAR(10),
+    madrs_criteria_v16                      VARCHAR(10),
+    madrs_criteria_v17                      VARCHAR(10),
+    stratification_country                  VARCHAR(10),
+    age_group                               VARCHAR(20),
+    stable_remitters                        VARCHAR(50),
+    irt_subject_status                      VARCHAR(100),
+    last_irt_transaction                    VARCHAR(100),
+    last_irt_transaction_date_local         DATE,
+    last_irt_transaction_date_utc           DATE,
+    next_irt_transaction                    VARCHAR(100),
+    next_irt_transaction_date_local         DATE,
+    date_screened                           DATE,
+    date_screen_failed                      DATE,
+    date_randomized_part1                   DATE,
+    date_early_withdraw_randomized_part1    DATE,
+    date_open_label_induction               DATE,
+    date_early_withdraw_open_label_induction DATE,
+    date_randomized_part2                   DATE,
+    date_early_withdraw_randomized_part2    DATE,
+    date_completed                          DATE,
+    date_unblinded                          DATE,
+    FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
+    INDEX idx_import (import_id),
+    INDEX idx_subject (subject)
+);
+
+-- ── Subject visits / transactions (obě studie) ───────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_subject_visits (
+    id                          INT AUTO_INCREMENT PRIMARY KEY,
+    import_id                   INT          NOT NULL,
+    study                       VARCHAR(20)  NOT NULL,
+    subject                     VARCHAR(20)  NOT NULL,
+    visit_type                  ENUM('Past','Upcoming') NOT NULL,
+    scheduled_date              DATE,
+    window_days                 VARCHAR(20),
+    actual_date                 DATE,
+    irt_transaction_no          SMALLINT,
+    irt_transaction_description VARCHAR(200),
+    medication_assignment       VARCHAR(200),
+    quantity_assigned           SMALLINT,
+    medication_id               VARCHAR(20),
+    FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
+    INDEX idx_import (import_id),
+    INDEX idx_study_subject (study, subject)
+);
diff --git a/IWRS/Přehledpacientůstručný/db_config.py b/IWRS/Přehledpacientůstručný/db_config.py
new file mode 100644
index 0000000..bfa5959
--- /dev/null
+++ b/IWRS/Přehledpacientůstručný/db_config.py
@@ -0,0 +1,5 @@
+DB_HOST     = "192.168.1.76"
+DB_PORT     = 3306
+DB_USER     = "root"
+DB_PASSWORD = "Vlado9674+"
+DB_NAME     = "studie"
diff --git a/IWRS/Přehledpacientůstručný/import_to_mysql.py b/IWRS/Přehledpacientůstručný/import_to_mysql.py
new file mode 100644
index 0000000..6a16cbe
--- /dev/null
+++ b/IWRS/Přehledpacientůstručný/import_to_mysql.py
@@ -0,0 +1,358 @@
+"""
+Importuje data z IWRS Excel reportů do MySQL (databáze studie).
+
+Pořadí spuštění:
+  1. download_subject_summary.py
+  2. download_subject_details.py
+  3. tento skript
+
+Každé spuštění vytvoří nový import_id v iwrs_import.
+Reportovací skripty pracují vždy s MAX(import_id) pro danou studii.
+"""
+
+import os
+import glob
+import datetime
+import re
+
+import pandas as pd
+import mysql.connector
+
+import db_config
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+DETAILS_DIR  = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+
+# ── helpers ──────────────────────────────────────────────────────────────────
+
+def get_conn():
+    return mysql.connector.connect(
+        host=db_config.DB_HOST,
+        port=db_config.DB_PORT,
+        user=db_config.DB_USER,
+        password=db_config.DB_PASSWORD,
+        database=db_config.DB_NAME,
+    )
+
+
+def to_date(val):
+    """Převede pandas Timestamp / string / NaT / NaN na date nebo None."""
+    if val is None or (isinstance(val, float) and pd.isna(val)):
+        return None
+    if isinstance(val, pd.Timestamp):
+        return None if pd.isna(val) else val.date()
+    if isinstance(val, datetime.datetime):
+        return val.date()
+    if isinstance(val, datetime.date):
+        return val
+    s = str(val).strip()
+    if not s or s.lower() in ("nat", "nan", "none", ""):
+        return None
+    for fmt in ("%Y-%m-%d", "%d-%b-%Y", "%d-%m-%Y", "%Y-%m-%d %H:%M:%S"):
+        try:
+            return datetime.datetime.strptime(s, fmt).date()
+        except ValueError:
+            pass
+    return None
+
+
+def to_int(val):
+    try:
+        v = float(val)
+        return None if pd.isna(v) else int(v)
+    except (TypeError, ValueError):
+        return None
+
+
+def to_float(val):
+    try:
+        v = float(val)
+        return None if pd.isna(v) else v
+    except (TypeError, ValueError):
+        return None
+
+
+def to_str(val):
+    if val is None or (isinstance(val, float) and pd.isna(val)):
+        return None
+    s = str(val).strip()
+    return None if s.lower() in ("nan", "nat", "none", "") else s
+
+
+def find_summary_file(study):
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
+    files = sorted(
+        [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
+        key=os.path.getmtime,
+        reverse=True,
+    )
+    if not files:
+        raise FileNotFoundError(f"Nenalezen Subject Summary Report pro {study}")
+    if not os.path.basename(files[0]).startswith(today):
+        print(f"  UPOZORNĚNÍ: nejnovější Summary Report pro {study} není z dnešního dne ({os.path.basename(files[0])[:10]})")
+    return files[0]
+
+
+def read_summary_df(path):
+    """Přečte Summary xlsx, vrátí DataFrame od řádku s hlavičkou."""
+    raw = pd.read_excel(path, header=None)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError(f"Hlavičkový řádek nenalezen v {path}")
+    return pd.read_excel(path, header=header_row)
+
+
+def find_detail_files(study):
+    out_dir = os.path.join(DETAILS_DIR, study)
+    # Vezme soubory ze stejného dne jako nejnovější Summary Report
+    summary_path = find_summary_file(study)
+    file_date = os.path.basename(summary_path)[:10]  # "YYYY-MM-DD"
+    pattern = os.path.join(out_dir, f"{file_date} {study} * Subject Detail.xlsx")
+    files = [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")]
+    return sorted(files)
+
+
+def parse_detail_visits(path):
+    """
+    Vrátí list slovníků s daty visitů z Detail xlsx.
+    Každý řádek tabulky (od řádku s hlavičkou Visit Type) je jedna transakce.
+    """
+    df = pd.read_excel(path, sheet_name="patient_detail_report", header=None)
+
+    header_row = None
+    for i, row in df.iterrows():
+        if "Visit Type" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        return []
+
+    visits_df = df.iloc[header_row + 1:].copy()
+    visits_df.columns = range(visits_df.shape[1])
+
+    rows = []
+    for _, r in visits_df.iterrows():
+        visit_type = to_str(r.get(0))
+        if visit_type not in ("Past", "Upcoming"):
+            continue
+        rows.append({
+            "visit_type":                  visit_type,
+            "scheduled_date":              to_date(r.get(1)),
+            "window_days":                 to_str(r.get(2)),
+            "actual_date":                 to_date(r.get(3)),
+            "irt_transaction_no":          to_int(r.get(4)),
+            "irt_transaction_description": to_str(r.get(5)),
+            "medication_assignment":       to_str(r.get(6)),
+            "quantity_assigned":           to_int(r.get(7)),
+            "medication_id":               to_str(r.get(8)),
+        })
+    return rows
+
+
+# ── insert helpers ────────────────────────────────────────────────────────────
+
+def insert_import(cursor, study, source_file):
+    cursor.execute(
+        "INSERT INTO iwrs_import (study, imported_at, source_file) VALUES (%s, %s, %s)",
+        (study, datetime.datetime.now(), os.path.basename(source_file)),
+    )
+    return cursor.lastrowid
+
+
+def insert_uco3001_summary(cursor, import_id, df):
+    sql = """
+        INSERT INTO iwrs_uco3001_subject_summary (
+            import_id, subject, prior_subject_identifier, site, investigator, location,
+            cohort_per_irt, informed_consent_date, adolescent_assent_date, age, weight,
+            rescreened_subject, adt_ir, three_or_more_advanced_therapies,
+            only_oral_5asa_compounds, ustekinumab, isolated_proctitis,
+            clinical_responder_status_i12_m0, irt_subject_status,
+            i0_rand_date_local, last_irt_transaction,
+            last_irt_transaction_date_local, last_irt_transaction_date_utc,
+            next_irt_transaction, next_irt_transaction_date_local,
+            most_recent_med_assignment_date, days_since_last_med_assignment,
+            patient_forecast_status, patient_forecast_status_changed_date
+        ) VALUES (
+            %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
+        )
+    """
+    col = df.columns.tolist()
+
+    def c(name):
+        return col.index(name) if name in col else None
+
+    for _, r in df.iterrows():
+        cursor.execute(sql, (
+            import_id,
+            to_str(r["Subject"]),
+            to_str(r["Prior Subject Identifier"]) if "Prior Subject Identifier" in col else None,
+            to_str(r["Site"]),
+            to_str(r["Investigator"]),
+            to_str(r["Location"]),
+            to_str(r["Cohort per IRT"]),
+            to_date(r["Informed Consent Date"]),
+            to_date(r["Adolescent Assent Date"]) if "Adolescent Assent Date" in col else None,
+            to_int(r["Subject's age collection"]),
+            to_float(r["Subject's weight collection"]) if "Subject's weight collection" in col else None,
+            to_str(r["Rescreened Subject"]) if "Rescreened Subject" in col else None,
+            to_str(r["ADT-IR"]) if "ADT-IR" in col else None,
+            to_str(r["3 or More Advanced Therapies"]) if "3 or More Advanced Therapies" in col else None,
+            to_str(r["Only Oral 5-ASA Compounds"]) if "Only Oral 5-ASA Compounds" in col else None,
+            to_str(r["Ustekinumab"]) if "Ustekinumab" in col else None,
+            to_str(r["Isolated Proctitis"]) if "Isolated Proctitis" in col else None,
+            to_str(r["Clinical Responder Status at I-12 / M-0"]) if "Clinical Responder Status at I-12 / M-0" in col else None,
+            to_str(r["IRT Subject Status"]),
+            to_date(r["I0_RAND_TIMESTAMP_LOCAL [Local]"]) if "I0_RAND_TIMESTAMP_LOCAL [Local]" in col else None,
+            to_str(r["Last Recorded IRT Transaction"]),
+            to_date(r["Last Recorded IRT Transaction Date [Local]"]),
+            to_date(r["Last Recorded IRT Transaction Date (UTC)"]),
+            to_str(r["Next Expected IRT Transaction"]),
+            to_date(r["Next Expected IRT Transaction Date [Local]"]),
+            to_date(r["Most Recent Medication Assignment Transaction [Local]"]) if "Most Recent Medication Assignment Transaction [Local]" in col else None,
+            to_int(r["Days Since Last Medication Assignment Transaction"]) if "Days Since Last Medication Assignment Transaction" in col else None,
+            to_str(r["Patient Forecast Status"]) if "Patient Forecast Status" in col else None,
+            to_date(r["Patient Forecast Status Changed Date (UTC)"]) if "Patient Forecast Status Changed Date (UTC)" in col else None,
+        ))
+
+
+def insert_mdd3003_summary(cursor, import_id, df):
+    sql = """
+        INSERT INTO iwrs_mdd3003_subject_summary (
+            import_id, subject, prior_subject_identifier, site, investigator, location,
+            cohort_per_irt, madrs_criteria_integrated, informed_consent_date, age,
+            madrs_criteria_v15, madrs_criteria_v16, madrs_criteria_v17,
+            stratification_country, age_group, stable_remitters, irt_subject_status,
+            last_irt_transaction, last_irt_transaction_date_local,
+            last_irt_transaction_date_utc, next_irt_transaction,
+            next_irt_transaction_date_local, date_screened, date_screen_failed,
+            date_randomized_part1, date_early_withdraw_randomized_part1,
+            date_open_label_induction, date_early_withdraw_open_label_induction,
+            date_randomized_part2, date_early_withdraw_randomized_part2,
+            date_completed, date_unblinded
+        ) VALUES (
+            %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
+        )
+    """
+    col = df.columns.tolist()
+
+    for _, r in df.iterrows():
+        cursor.execute(sql, (
+            import_id,
+            to_str(r["Subject"]),
+            to_str(r["Prior Subject Identifier"]) if "Prior Subject Identifier" in col else None,
+            to_str(r["Site"]),
+            to_str(r["Investigator"]),
+            to_str(r["Location"]),
+            to_str(r["Cohort per IRT"]),
+            to_str(r["MADRS response criteria integrated or manually entered"]) if "MADRS response criteria integrated or manually entered" in col else None,
+            to_date(r["Informed Consent Date"]),
+            to_int(r["Subject's age collection"]),
+            to_str(r["MADRS response criteria v1.5 from RAVE"]) if "MADRS response criteria v1.5 from RAVE" in col else None,
+            to_str(r["MADRS response criteria v1.6 from RAVE"]) if "MADRS response criteria v1.6 from RAVE" in col else None,
+            to_str(r["MADRS response criteria v1.7 from RAVE"]) if "MADRS response criteria v1.7 from RAVE" in col else None,
+            to_str(r["Stratification Country"]) if "Stratification Country" in col else None,
+            to_str(r["Age Group"]) if "Age Group" in col else None,
+            to_str(r["Stable Remitters vs. Non Stable Remitters"]) if "Stable Remitters vs. Non Stable Remitters" in col else None,
+            to_str(r["IRT Subject Status"]),
+            to_str(r["Last Recorded IRT Transaction"]),
+            to_date(r["Last Recorded IRT Transaction Date [Local]"]),
+            to_date(r["Last Recorded IRT Transaction Date (UTC)"]),
+            to_str(r["Next Expected IRT Transaction"]),
+            to_date(r["Next Expected IRT Transaction Date [Local]"]),
+            to_date(r["Date Screened [Local]"]) if "Date Screened [Local]" in col else None,
+            to_date(r["Date Screen Failed [Local]"]) if "Date Screen Failed [Local]" in col else None,
+            to_date(r["Date Randomized Part 1 [Local]"]) if "Date Randomized Part 1 [Local]" in col else None,
+            to_date(r["Date Early Withdraw Randomized Part 1 [Local]"]) if "Date Early Withdraw Randomized Part 1 [Local]" in col else None,
+            to_date(r["Date Open Label Induction [Local]"]) if "Date Open Label Induction [Local]" in col else None,
+            to_date(r["Date Early Withdraw Open Label Induction [Local]"]) if "Date Early Withdraw Open Label Induction [Local]" in col else None,
+            to_date(r["Date Randomized Part 2 [Local]"]) if "Date Randomized Part 2 [Local]" in col else None,
+            to_date(r["Date Early Withdraw Randomized Part 2 [Local]"]) if "Date Early Withdraw Randomized Part 2 [Local]" in col else None,
+            to_date(r["Date Completed [Local]"]) if "Date Completed [Local]" in col else None,
+            to_date(r["Date Unblinded [Local]"]) if "Date Unblinded [Local]" in col else None,
+        ))
+
+
+def insert_visits(cursor, import_id, study, subject, visits):
+    if not visits:
+        return
+    sql = """
+        INSERT INTO iwrs_subject_visits (
+            import_id, study, subject, visit_type, scheduled_date, window_days,
+            actual_date, irt_transaction_no, irt_transaction_description,
+            medication_assignment, quantity_assigned, medication_id
+        ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
+    """
+    for v in visits:
+        cursor.execute(sql, (
+            import_id, study, subject,
+            v["visit_type"], v["scheduled_date"], v["window_days"],
+            v["actual_date"], v["irt_transaction_no"],
+            v["irt_transaction_description"], v["medication_assignment"],
+            v["quantity_assigned"], v["medication_id"],
+        ))
+
+
+# ── main ──────────────────────────────────────────────────────────────────────
+
+def import_study(conn, study):
+    summary_path = find_summary_file(study)
+    print(f"  Summary: {os.path.basename(summary_path)}")
+
+    df_summary = read_summary_df(summary_path)
+    df_summary = df_summary.dropna(how="all")
+
+    detail_files = find_detail_files(study)
+    print(f"  Detail souborů: {len(detail_files)}")
+
+    cursor = conn.cursor()
+    import_id = insert_import(cursor, study, summary_path)
+    print(f"  import_id = {import_id}")
+
+    if study == "77242113UCO3001":
+        insert_uco3001_summary(cursor, import_id, df_summary)
+    else:
+        insert_mdd3003_summary(cursor, import_id, df_summary)
+    print(f"  Summary řádků: {len(df_summary)}")
+
+    visited = 0
+    for path in detail_files:
+        fname = os.path.basename(path)
+        # název: "2026-05-04 77242113UCO3001 CZ100012001 Subject Detail.xlsx"
+        m = re.search(r"\d{4}-\d{2}-\d{2} \S+ (\S+) Subject Detail\.xlsx", fname)
+        subject = m.group(1) if m else "UNKNOWN"
+        visits = parse_detail_visits(path)
+        insert_visits(cursor, import_id, study, subject, visits)
+        visited += len(visits)
+
+    conn.commit()
+    cursor.close()
+    print(f"  Transakce uloženo: {visited}")
+    return import_id
+
+
+def main():
+    conn = get_conn()
+    print("Připojeno k MySQL.\n")
+
+    for study in STUDIES:
+        print(f"[{study}]")
+        try:
+            import_id = import_study(conn, study)
+            print(f"  OK — import_id {import_id}\n")
+        except Exception as e:
+            print(f"  CHYBA: {e}\n")
+
+    conn.close()
+    print("Hotovo.")
+
+
+main()
diff --git a/IWRS/Přehledpacientůstručný/run_all.py b/IWRS/Přehledpacientůstručný/run_all.py
new file mode 100644
index 0000000..9453955
--- /dev/null
+++ b/IWRS/Přehledpacientůstručný/run_all.py
@@ -0,0 +1,422 @@
+"""
+Kompletní pipeline:
+  1. Stažení Subject Summary Reportů (obě studie)
+  2. Stažení Subject Detail Reportů (obě studie)
+  3. Import do MySQL
+
+Spusť tento skript místo tří samostatných skriptů.
+"""
+
+import os
+import datetime
+import glob
+import re
+
+from playwright.sync_api import sync_playwright
+import pandas as pd
+
+import db_config
+import mysql.connector
+
+# ── CONFIG ───────────────────────────────────────────────────────────────────
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+DETAILS_DIR  = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
+
+
+# ── helpers ───────────────────────────────────────────────────────────────────
+
+def unique_path(directory, stem):
+    path = os.path.join(directory, f"{stem}.xlsx")
+    if not os.path.exists(path):
+        return path
+    time_tag = datetime.datetime.now().strftime("%H%M")
+    return os.path.join(directory, f"{stem} {time_tag}.xlsx")
+
+
+def login(page, study):
+    page.goto(BASE_URL)
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Email *").fill(EMAIL)
+    page.get_by_label("Password *").fill(PASSWORD)
+    page.locator("#login__submit").click()
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Study *").click()
+    page.get_by_role("option", name=study).click()
+    page.get_by_role("button", name="SELECT").click()
+    page.wait_for_load_state("networkidle")
+
+
+# ── KROK 1: Subject Summary ───────────────────────────────────────────────────
+
+def download_summary(page, study, today):
+    print(f"  [{study}] Stahuji Subject Summary Report...")
+    page.goto(f"{BASE_URL}/report/patient_summary_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+    filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
+    with page.expect_download(timeout=120000) as dl:
+        page.get_by_role("button", name="Download XLS").click()
+    dl.value.save_as(filename)
+    print(f"  [{study}] Summary OK -> {os.path.basename(filename)}")
+    return filename
+
+
+# ── KROK 2: Subject Details ───────────────────────────────────────────────────
+
+def get_subjects_from_summary(summary_path):
+    raw = pd.read_excel(summary_path, header=None)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError("Hlavičkový řádek nenalezen")
+    df = pd.read_excel(summary_path, header=header_row)
+    return df["Subject"].dropna().astype(str).str.strip().tolist()
+
+
+def download_details(page, study, summary_path, today):
+    out_dir = os.path.join(DETAILS_DIR, study)
+    os.makedirs(out_dir, exist_ok=True)
+
+    subjects = get_subjects_from_summary(summary_path)
+    print(f"  [{study}] Subjektů k stažení: {len(subjects)}")
+
+    page.goto(f"{BASE_URL}/report/patient_detail_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+
+    for subject in subjects:
+        filename = os.path.join(out_dir, f"{today} {study} {subject} Subject Detail.xlsx")
+        input_field = page.locator('input[placeholder="search"], input[type="text"]').first
+        input_field.click()
+        input_field.fill(subject)
+        page.wait_for_timeout(500)
+        page.locator("mat-option").first.dispatch_event("click")
+        page.wait_for_load_state("networkidle", timeout=120000)
+
+        with page.expect_download(timeout=120000) as dl:
+            page.get_by_role("button", name="Download XLS").click()
+        dl.value.save_as(filename)
+        print(f"  [{study}] Detail {subject} OK")
+
+        page.get_by_role("button", name="Clear").click()
+        page.wait_for_load_state("networkidle", timeout=120000)
+
+
+# ── KROK 3: Import do MySQL ───────────────────────────────────────────────────
+
+def get_conn():
+    return mysql.connector.connect(
+        host=db_config.DB_HOST,
+        port=db_config.DB_PORT,
+        user=db_config.DB_USER,
+        password=db_config.DB_PASSWORD,
+        database=db_config.DB_NAME,
+    )
+
+
+def to_date(val):
+    if val is None or (isinstance(val, float) and pd.isna(val)):
+        return None
+    if isinstance(val, pd.Timestamp):
+        return None if pd.isna(val) else val.date()
+    if isinstance(val, datetime.datetime):
+        return val.date()
+    if isinstance(val, datetime.date):
+        return val
+    s = str(val).strip()
+    if not s or s.lower() in ("nat", "nan", "none", ""):
+        return None
+    for fmt in ("%Y-%m-%d", "%d-%b-%Y", "%d-%m-%Y", "%Y-%m-%d %H:%M:%S"):
+        try:
+            return datetime.datetime.strptime(s, fmt).date()
+        except ValueError:
+            pass
+    return None
+
+
+def to_int(val):
+    try:
+        v = float(val)
+        return None if pd.isna(v) else int(v)
+    except (TypeError, ValueError):
+        return None
+
+
+def to_float(val):
+    try:
+        v = float(val)
+        return None if pd.isna(v) else v
+    except (TypeError, ValueError):
+        return None
+
+
+def to_str(val):
+    if val is None or (isinstance(val, float) and pd.isna(val)):
+        return None
+    s = str(val).strip()
+    return None if s.lower() in ("nan", "nat", "none", "") else s
+
+
+def read_summary_df(path):
+    raw = pd.read_excel(path, header=None)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError(f"Hlavičkový řádek nenalezen v {path}")
+    return pd.read_excel(path, header=header_row).dropna(how="all")
+
+
+def parse_detail_visits(path):
+    df = pd.read_excel(path, sheet_name="patient_detail_report", header=None)
+    header_row = None
+    for i, row in df.iterrows():
+        if "Visit Type" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        return []
+    visits_df = df.iloc[header_row + 1:].copy()
+    visits_df.columns = range(visits_df.shape[1])
+    rows = []
+    for _, r in visits_df.iterrows():
+        visit_type = to_str(r.get(0))
+        if visit_type not in ("Past", "Upcoming"):
+            continue
+        rows.append({
+            "visit_type":                  visit_type,
+            "scheduled_date":              to_date(r.get(1)),
+            "window_days":                 to_str(r.get(2)),
+            "actual_date":                 to_date(r.get(3)),
+            "irt_transaction_no":          to_int(r.get(4)),
+            "irt_transaction_description": to_str(r.get(5)),
+            "medication_assignment":       to_str(r.get(6)),
+            "quantity_assigned":           to_int(r.get(7)),
+            "medication_id":               to_str(r.get(8)),
+        })
+    return rows
+
+
+def insert_import(cursor, study, source_file):
+    cursor.execute(
+        "INSERT INTO iwrs_import (study, imported_at, source_file) VALUES (%s, %s, %s)",
+        (study, datetime.datetime.now(), os.path.basename(source_file)),
+    )
+    return cursor.lastrowid
+
+
+def insert_uco3001_summary(cursor, import_id, df):
+    sql = """INSERT INTO iwrs_uco3001_subject_summary (
+        import_id, subject, prior_subject_identifier, site, investigator, location,
+        cohort_per_irt, informed_consent_date, adolescent_assent_date, age, weight,
+        rescreened_subject, adt_ir, three_or_more_advanced_therapies,
+        only_oral_5asa_compounds, ustekinumab, isolated_proctitis,
+        clinical_responder_status_i12_m0, irt_subject_status,
+        i0_rand_date_local, last_irt_transaction,
+        last_irt_transaction_date_local, last_irt_transaction_date_utc,
+        next_irt_transaction, next_irt_transaction_date_local,
+        most_recent_med_assignment_date, days_since_last_med_assignment,
+        patient_forecast_status, patient_forecast_status_changed_date
+    ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
+    col = df.columns.tolist()
+    for _, r in df.iterrows():
+        cursor.execute(sql, (
+            import_id,
+            to_str(r["Subject"]),
+            to_str(r["Prior Subject Identifier"]) if "Prior Subject Identifier" in col else None,
+            to_str(r["Site"]),
+            to_str(r["Investigator"]),
+            to_str(r["Location"]),
+            to_str(r["Cohort per IRT"]),
+            to_date(r["Informed Consent Date"]),
+            to_date(r["Adolescent Assent Date"]) if "Adolescent Assent Date" in col else None,
+            to_int(r["Subject's age collection"]),
+            to_float(r["Subject's weight collection"]) if "Subject's weight collection" in col else None,
+            to_str(r["Rescreened Subject"]) if "Rescreened Subject" in col else None,
+            to_str(r["ADT-IR"]) if "ADT-IR" in col else None,
+            to_str(r["3 or More Advanced Therapies"]) if "3 or More Advanced Therapies" in col else None,
+            to_str(r["Only Oral 5-ASA Compounds"]) if "Only Oral 5-ASA Compounds" in col else None,
+            to_str(r["Ustekinumab"]) if "Ustekinumab" in col else None,
+            to_str(r["Isolated Proctitis"]) if "Isolated Proctitis" in col else None,
+            to_str(r["Clinical Responder Status at I-12 / M-0"]) if "Clinical Responder Status at I-12 / M-0" in col else None,
+            to_str(r["IRT Subject Status"]),
+            to_date(r["I0_RAND_TIMESTAMP_LOCAL [Local]"]) if "I0_RAND_TIMESTAMP_LOCAL [Local]" in col else None,
+            to_str(r["Last Recorded IRT Transaction"]),
+            to_date(r["Last Recorded IRT Transaction Date [Local]"]),
+            to_date(r["Last Recorded IRT Transaction Date (UTC)"]),
+            to_str(r["Next Expected IRT Transaction"]),
+            to_date(r["Next Expected IRT Transaction Date [Local]"]),
+            to_date(r["Most Recent Medication Assignment Transaction [Local]"]) if "Most Recent Medication Assignment Transaction [Local]" in col else None,
+            to_int(r["Days Since Last Medication Assignment Transaction"]) if "Days Since Last Medication Assignment Transaction" in col else None,
+            to_str(r["Patient Forecast Status"]) if "Patient Forecast Status" in col else None,
+            to_date(r["Patient Forecast Status Changed Date (UTC)"]) if "Patient Forecast Status Changed Date (UTC)" in col else None,
+        ))
+
+
+def insert_mdd3003_summary(cursor, import_id, df):
+    sql = """INSERT INTO iwrs_mdd3003_subject_summary (
+        import_id, subject, prior_subject_identifier, site, investigator, location,
+        cohort_per_irt, madrs_criteria_integrated, informed_consent_date, age,
+        madrs_criteria_v15, madrs_criteria_v16, madrs_criteria_v17,
+        stratification_country, age_group, stable_remitters, irt_subject_status,
+        last_irt_transaction, last_irt_transaction_date_local,
+        last_irt_transaction_date_utc, next_irt_transaction,
+        next_irt_transaction_date_local, date_screened, date_screen_failed,
+        date_randomized_part1, date_early_withdraw_randomized_part1,
+        date_open_label_induction, date_early_withdraw_open_label_induction,
+        date_randomized_part2, date_early_withdraw_randomized_part2,
+        date_completed, date_unblinded
+    ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
+    col = df.columns.tolist()
+    for _, r in df.iterrows():
+        cursor.execute(sql, (
+            import_id,
+            to_str(r["Subject"]),
+            to_str(r["Prior Subject Identifier"]) if "Prior Subject Identifier" in col else None,
+            to_str(r["Site"]),
+            to_str(r["Investigator"]),
+            to_str(r["Location"]),
+            to_str(r["Cohort per IRT"]),
+            to_str(r["MADRS response criteria integrated or manually entered"]) if "MADRS response criteria integrated or manually entered" in col else None,
+            to_date(r["Informed Consent Date"]),
+            to_int(r["Subject's age collection"]),
+            to_str(r["MADRS response criteria v1.5 from RAVE"]) if "MADRS response criteria v1.5 from RAVE" in col else None,
+            to_str(r["MADRS response criteria v1.6 from RAVE"]) if "MADRS response criteria v1.6 from RAVE" in col else None,
+            to_str(r["MADRS response criteria v1.7 from RAVE"]) if "MADRS response criteria v1.7 from RAVE" in col else None,
+            to_str(r["Stratification Country"]) if "Stratification Country" in col else None,
+            to_str(r["Age Group"]) if "Age Group" in col else None,
+            to_str(r["Stable Remitters vs. Non Stable Remitters"]) if "Stable Remitters vs. Non Stable Remitters" in col else None,
+            to_str(r["IRT Subject Status"]),
+            to_str(r["Last Recorded IRT Transaction"]),
+            to_date(r["Last Recorded IRT Transaction Date [Local]"]),
+            to_date(r["Last Recorded IRT Transaction Date (UTC)"]),
+            to_str(r["Next Expected IRT Transaction"]),
+            to_date(r["Next Expected IRT Transaction Date [Local]"]),
+            to_date(r["Date Screened [Local]"]) if "Date Screened [Local]" in col else None,
+            to_date(r["Date Screen Failed [Local]"]) if "Date Screen Failed [Local]" in col else None,
+            to_date(r["Date Randomized Part 1 [Local]"]) if "Date Randomized Part 1 [Local]" in col else None,
+            to_date(r["Date Early Withdraw Randomized Part 1 [Local]"]) if "Date Early Withdraw Randomized Part 1 [Local]" in col else None,
+            to_date(r["Date Open Label Induction [Local]"]) if "Date Open Label Induction [Local]" in col else None,
+            to_date(r["Date Early Withdraw Open Label Induction [Local]"]) if "Date Early Withdraw Open Label Induction [Local]" in col else None,
+            to_date(r["Date Randomized Part 2 [Local]"]) if "Date Randomized Part 2 [Local]" in col else None,
+            to_date(r["Date Early Withdraw Randomized Part 2 [Local]"]) if "Date Early Withdraw Randomized Part 2 [Local]" in col else None,
+            to_date(r["Date Completed [Local]"]) if "Date Completed [Local]" in col else None,
+            to_date(r["Date Unblinded [Local]"]) if "Date Unblinded [Local]" in col else None,
+        ))
+
+
+def insert_visits(cursor, import_id, study, subject, visits):
+    if not visits:
+        return
+    sql = """INSERT INTO iwrs_subject_visits (
+        import_id, study, subject, visit_type, scheduled_date, window_days,
+        actual_date, irt_transaction_no, irt_transaction_description,
+        medication_assignment, quantity_assigned, medication_id
+    ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
+    for v in visits:
+        cursor.execute(sql, (
+            import_id, study, subject,
+            v["visit_type"], v["scheduled_date"], v["window_days"],
+            v["actual_date"], v["irt_transaction_no"],
+            v["irt_transaction_description"], v["medication_assignment"],
+            v["quantity_assigned"], v["medication_id"],
+        ))
+
+
+def import_to_mysql(summary_path, detail_files, study):
+    print(f"\n  [MySQL] Importuji {study}...")
+    df_summary = read_summary_df(summary_path)
+    conn = get_conn()
+    cursor = conn.cursor()
+
+    import_id = insert_import(cursor, study, summary_path)
+
+    if study == "77242113UCO3001":
+        insert_uco3001_summary(cursor, import_id, df_summary)
+    else:
+        insert_mdd3003_summary(cursor, import_id, df_summary)
+
+    total_visits = 0
+    for path in detail_files:
+        fname = os.path.basename(path)
+        m = re.search(r"\d{4}-\d{2}-\d{2} \S+ (\S+) Subject Detail\.xlsx", fname)
+        subject = m.group(1) if m else "UNKNOWN"
+        visits = parse_detail_visits(path)
+        insert_visits(cursor, import_id, study, subject, visits)
+        total_visits += len(visits)
+
+    conn.commit()
+    cursor.close()
+    conn.close()
+    print(f"  [MySQL] import_id={import_id} | pacientů={len(df_summary)} | transakcí={total_visits}")
+    return import_id
+
+
+# ── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    os.makedirs(INCOMING_DIR, exist_ok=True)
+    os.makedirs(DETAILS_DIR, exist_ok=True)
+
+    summary_paths = {}
+
+    # ── Krok 1 + 2: stahování (Playwright, každá studie zvlášť kvůli session) ──
+    with sync_playwright() as p:
+        for study in STUDIES:
+            print(f"\n{'='*60}")
+            print(f"[{study}] KROK 1: Subject Summary Report")
+            print(f"{'='*60}")
+            browser = p.chromium.launch(headless=False)
+            context = browser.new_context(accept_downloads=True)
+            page = context.new_page()
+
+            try:
+                login(page, study)
+                summary_path = download_summary(page, study, today)
+                summary_paths[study] = summary_path
+
+                print(f"\n[{study}] KROK 2: Subject Detail Reports")
+                download_details(page, study, summary_path, today)
+            except Exception as e:
+                print(f"  [{study}] CHYBA při stahování: {e}")
+                summary_paths[study] = None
+            finally:
+                browser.close()
+
+    # ── Krok 3: import do MySQL ──────────────────────────────────────────────
+    print(f"\n{'='*60}")
+    print("KROK 3: Import do MySQL")
+    print(f"{'='*60}")
+
+    for study in STUDIES:
+        summary_path = summary_paths.get(study)
+        if not summary_path:
+            print(f"  [{study}] PŘESKOČENO — stahování selhalo")
+            continue
+
+        detail_files = sorted(glob.glob(
+            os.path.join(DETAILS_DIR, study, f"{today} {study} * Subject Detail.xlsx")
+        ))
+
+        try:
+            import_to_mysql(summary_path, detail_files, study)
+        except Exception as e:
+            print(f"  [{study}] CHYBA při importu: {e}")
+
+    print(f"\n{'='*60}")
+    print("Vše hotovo.")
+    print(f"{'='*60}")
+
+
+main()