z230

2026-06-10 11:59:19 +02:00
parent a41f97b86b
commit 7b2f69ad85
275 changed files with 16726 additions and 0 deletions
@@ -0,0 +1,56 @@
+# Kontext práce — IWRS Notifications Pipeline
+## Datum: 2026-06-01
+
+## Co bylo uděláno
+
+### Nové soubory
+- `download_subject_notifications.py` — standalone skript pro stažení notifikací (referenční, nepoužívaný v pipeline)
+- `test_notifications.py` — testovací skript pro jednoho pacienta (CZ100222003 / UCO3001)
+- `create_iwrs_tables.py` — jednorázový skript pro vytvoření MySQL tabulek
+
+### Upravené soubory
+- `download_subject_details.py` — přidáno stahování notifikací (PDF + JSON) pro každý subjekt přímo v loopě
+- `import_to_mysql.py` — přidána funkce `import_notifications()` která importuje JSON+PDF do DB a přesouvá do `Zpracováno/`
+- `create_iwrs_tables.sql` — přidána tabulka `iwrs_notifications`
+- `run_all.py` — krok 2 nyní volá `dsd.run()` z `download_subject_details.py`
+
+## Jak to funguje
+
+### Stahování notifikací (v `download_subject_details.py`)
+1. Při výběru subjektu se zachytí `table_1` API response (obsahuje notifikace s `pk`, `et_title`, `label`, `body`, `actual_date_raw`)
+2. Porovná `pk` s DB (`iwrs_notifications`) — stahuje jen nové
+3. Stáhne PDF přes `page.request.get()` s Bearer tokenem (JWT se načítá čerstvě před každým requestem)
+4. Uloží PDF + JSON do `IncomingSourceReportsDetails/{study}/`
+5. Název souboru: `{actual_date_raw}_{label_s_podtržítky}.pdf` (při kolizi přidá `_pk{pk}`)
+
+### API endpointy
+- **Notifikace data**: `POST /_/p/{instance_id}/api/v1/reports_api/report_data?path=patient_detail_report&id={subject}&key=table_1&unblinded=false`
+- **PDF download**: `GET /_/p/{instance_id}/api/v1/meta_api/pdfnotification?pk={pk}&title={et_title}&html=true`
+- **app_instances** (pro zjištění instance_id): `GET /_/api/dispatch/app_instances/`
+- Headers: `Authorization: Bearer {JWT}`, `lang: en`, `prancer_study: {study_code}`
+
+### Instance ID mapping
+- `77242113UCO3001` → `/_/p/106`
+- `42847922MDD3003` → `/_/p/70`
+- `77242113CRD3001` → `/_/p/103`
+
+### Import (`import_to_mysql.py`)
+- Čte všechny `.json` soubory z `IncomingSourceReportsDetails/{study}/`
+- Načte příslušné `.pdf` jako binární data
+- Uloží do tabulky `iwrs_notifications` (UNIQUE KEY na `pk` — bez duplikátů)
+- Přesune soubory do `IncomingSourceReportsDetails/{study}/Zpracováno/`
+
+## MySQL tabulka `iwrs_notifications`
+```sql
+id, study, subject, pk (UNIQUE), title, label, event, actual_date, text (TEXT), pdf (MEDIUMBLOB), source_file, imported_at
+```
+
+## Aktuální stav
+- UCO3001: ~76 notifikací importováno
+- MDD3003: ~119 notifikací importováno (část 403 chyb — JWT expiroval, opraveno načítáním JWT čerstvě)
+- MDD3003 notifikace s 403 čekají na příští `run_all.py` (soubory nejsou v `Zpracováno`, takže se znovu stáhnou)
+
+## Co zbývá / možná vylepšení
+- Ověřit že MDD3003 403 chyby jsou opraveny (JWT refresh)
+- `CZ100132003` UCO3001 — timeout při stahování XLS (subjekt přeskočen, zkusit znovu)
+- Případně přidat retry logiku pro timeout
@@ -0,0 +1,39 @@
+"""
+Jednorázový skript — vytvoří/aktualizuje tabulky v MySQL.
+Spusť jednou: python create_iwrs_tables.py
+"""
+import os
+import mysql.connector
+import db_config
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+SQL_FILE = os.path.join(BASE_DIR, "create_iwrs_tables.sql")
+
+conn = mysql.connector.connect(
+    host=db_config.DB_HOST,
+    port=db_config.DB_PORT,
+    user=db_config.DB_USER,
+    password=db_config.DB_PASSWORD,
+    database=db_config.DB_NAME,
+)
+cursor = conn.cursor()
+
+sql = open(SQL_FILE, encoding="utf-8").read()
+# Odstraň komentáře a rozdělíme na příkazy
+stmts = [s.strip() for s in sql.split(";")]
+for stmt in stmts:
+    # Odstraň řádkové komentáře
+    lines = [l for l in stmt.splitlines() if not l.strip().startswith("--")]
+    stmt = "\n".join(lines).strip()
+    if not stmt or stmt.upper().startswith("USE"):
+        continue
+    try:
+        cursor.execute(stmt)
+        print(f"OK: {stmt[:80]}")
+    except Exception as e:
+        print(f"SKIP: {e}")
+
+conn.commit()
+cursor.close()
+conn.close()
+print("\nHotovo.")
@@ -0,0 +1,128 @@
+-- IWRS tabulky pro databázi studie
+-- Spustit jednou: mysql -h 192.168.1.76 -u root -p studie < create_iwrs_tables.sql
+
+USE studie;
+
+-- ── Import log ───────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_import (
+    import_id   INT AUTO_INCREMENT PRIMARY KEY,
+    study       VARCHAR(20)  NOT NULL,
+    imported_at DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    source_file VARCHAR(500) NOT NULL,
+    INDEX idx_study (study)
+);
+
+-- ── UCO3001 subject summary ───────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_uco3001_subject_summary (
+    id                                  INT AUTO_INCREMENT PRIMARY KEY,
+    import_id                           INT          NOT NULL,
+    subject                             VARCHAR(20)  NOT NULL,
+    prior_subject_identifier            VARCHAR(20),
+    site                                VARCHAR(50),
+    investigator                        VARCHAR(100),
+    location                            VARCHAR(50),
+    cohort_per_irt                      VARCHAR(100),
+    informed_consent_date               DATE,
+    adolescent_assent_date              DATE,
+    age                                 SMALLINT,
+    weight                              DECIMAL(5,1),
+    rescreened_subject                  VARCHAR(10),
+    adt_ir                              VARCHAR(10),
+    three_or_more_advanced_therapies    VARCHAR(10),
+    only_oral_5asa_compounds            VARCHAR(10),
+    ustekinumab                         VARCHAR(10),
+    isolated_proctitis                  VARCHAR(10),
+    clinical_responder_status_i12_m0    VARCHAR(100),
+    irt_subject_status                  VARCHAR(50),
+    i0_rand_date_local                  DATE,
+    last_irt_transaction                VARCHAR(100),
+    last_irt_transaction_date_local     DATE,
+    last_irt_transaction_date_utc       DATE,
+    next_irt_transaction                VARCHAR(100),
+    next_irt_transaction_date_local     DATE,
+    most_recent_med_assignment_date     DATE,
+    days_since_last_med_assignment      SMALLINT,
+    patient_forecast_status             VARCHAR(50),
+    patient_forecast_status_changed_date DATE,
+    FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
+    INDEX idx_import (import_id),
+    INDEX idx_subject (subject)
+);
+
+-- ── MDD3003 subject summary ───────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_mdd3003_subject_summary (
+    id                                      INT AUTO_INCREMENT PRIMARY KEY,
+    import_id                               INT          NOT NULL,
+    subject                                 VARCHAR(20)  NOT NULL,
+    prior_subject_identifier                VARCHAR(20),
+    site                                    VARCHAR(50),
+    investigator                            VARCHAR(100),
+    location                                VARCHAR(50),
+    cohort_per_irt                          VARCHAR(50),
+    madrs_criteria_integrated               VARCHAR(50),
+    informed_consent_date                   DATE,
+    age                                     SMALLINT,
+    madrs_criteria_v15                      VARCHAR(10),
+    madrs_criteria_v16                      VARCHAR(10),
+    madrs_criteria_v17                      VARCHAR(10),
+    stratification_country                  VARCHAR(10),
+    age_group                               VARCHAR(20),
+    stable_remitters                        VARCHAR(50),
+    irt_subject_status                      VARCHAR(100),
+    last_irt_transaction                    VARCHAR(100),
+    last_irt_transaction_date_local         DATE,
+    last_irt_transaction_date_utc           DATE,
+    next_irt_transaction                    VARCHAR(100),
+    next_irt_transaction_date_local         DATE,
+    date_screened                           DATE,
+    date_screen_failed                      DATE,
+    date_randomized_part1                   DATE,
+    date_early_withdraw_randomized_part1    DATE,
+    date_open_label_induction               DATE,
+    date_early_withdraw_open_label_induction DATE,
+    date_randomized_part2                   DATE,
+    date_early_withdraw_randomized_part2    DATE,
+    date_completed                          DATE,
+    date_unblinded                          DATE,
+    FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
+    INDEX idx_import (import_id),
+    INDEX idx_subject (subject)
+);
+
+-- ── Notifications ────────────────────────────────────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_notifications (
+    id           INT AUTO_INCREMENT PRIMARY KEY,
+    study        VARCHAR(20)   NOT NULL,
+    subject      VARCHAR(20)   NOT NULL,
+    pk           INT           NOT NULL,
+    title        VARCHAR(100),
+    label        VARCHAR(500),
+    event        VARCHAR(50),
+    actual_date  DATE,
+    text         TEXT,
+    pdf          MEDIUMBLOB,
+    source_file  VARCHAR(500),
+    imported_at  DATETIME      NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    UNIQUE KEY uq_pk (pk),
+    INDEX idx_study_subject (study, subject)
+);
+
+-- ── Subject visits / transactions (obě studie) ───────────────────────────────
+CREATE TABLE IF NOT EXISTS iwrs_subject_visits (
+    id                          INT AUTO_INCREMENT PRIMARY KEY,
+    import_id                   INT          NOT NULL,
+    study                       VARCHAR(20)  NOT NULL,
+    subject                     VARCHAR(20)  NOT NULL,
+    visit_type                  ENUM('Past','Upcoming') NOT NULL,
+    scheduled_date              DATE,
+    window_days                 VARCHAR(20),
+    actual_date                 DATE,
+    irt_transaction_no          SMALLINT,
+    irt_transaction_description VARCHAR(200),
+    medication_assignment       VARCHAR(200),
+    quantity_assigned           SMALLINT,
+    medication_id               VARCHAR(20),
+    FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
+    INDEX idx_import (import_id),
+    INDEX idx_study_subject (study, subject)
+);
@@ -0,0 +1,310 @@
+import os
+import glob
+import datetime
+import pandas as pd
+from openpyxl import Workbook
+from openpyxl.styles import (
+    Font, PatternFill, Alignment, Border, Side, GradientFill
+)
+from openpyxl.utils import get_column_letter
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+CREATED_DIR  = os.path.join(BASE_DIR, "CreatedReports")
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+SOURCE_COLS = [
+    "Subject",
+    "Investigator",
+    "Subject's age collection",
+    "Cohort per IRT",
+    "IRT Subject Status",
+    "Last Recorded IRT Transaction",
+    "Next Expected IRT Transaction",
+    "Next Expected IRT Transaction Date [Local]",
+]
+
+DISPLAY_HEADERS = [
+    "Subject",
+    "Investigator",
+    "Věk",
+    "Cohort",
+    "Status",
+    "Last IRT",
+    "Next Visit",
+    "Next Date",
+]
+
+COL_WIDTHS = [14, 22, 6, 12, 14, 12, 12, 13]
+
+# ── Styles ───────────────────────────────────────────────────────────────────
+HEADER_FILL  = PatternFill("solid", fgColor="1F4E79")
+HEADER_FONT  = Font(name="Arial", bold=True, color="FFFFFF", size=10)
+NORMAL_FONT  = Font(name="Arial", size=10)
+BOLD_FONT    = Font(name="Arial", bold=True, size=10)
+STRIKE_FONT  = Font(name="Arial", size=10, strike=True, color="999999")
+ADOLESC_FONT = Font(name="Arial", bold=True, size=10)
+
+THIN = Side(style="thin", color="CCCCCC")
+BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
+
+EVEN_FILL = PatternFill("solid", fgColor="EBF3FB")
+ODD_FILL  = PatternFill("solid", fgColor="FFFFFF")
+
+CENTER = Alignment(horizontal="center", vertical="center", wrap_text=False)
+LEFT   = Alignment(horizontal="left",   vertical="center", wrap_text=False)
+
+
+def unique_path(directory, stem):
+    path = os.path.join(directory, f"{stem}.xlsx")
+    if not os.path.exists(path):
+        return path
+    time_tag = datetime.datetime.now().strftime("%H%M")
+    return os.path.join(directory, f"{stem} {time_tag}.xlsx")
+
+
+def find_latest_source(study):
+    pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
+    files = sorted(
+        [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
+        key=os.path.getmtime,
+        reverse=True,
+    )
+    if not files:
+        raise FileNotFoundError(f"Nenalezen zdrojový soubor pro {study} v {INCOMING_DIR}")
+    return files[0]
+
+
+def load_source(path):
+    raw = pd.read_excel(path, header=None)
+    # find header row (row with "Subject" in first cell)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError("Hlavičkový řádek nenalezen")
+    df = pd.read_excel(path, header=header_row)
+    return df
+
+
+def simplify_cohort(val):
+    if pd.isna(val):
+        return ""
+    val = str(val)
+    if "dolescent" in val:
+        return "Adolescent"
+    if val.startswith("Adult"):
+        return "Adult"
+    # MDD3003: "Part 1", "Part 2" — keep as-is
+    return val
+
+
+def format_date(val):
+    if pd.isna(val):
+        return ""
+    if hasattr(val, "strftime"):
+        return val.strftime("%Y-%m-%d")
+    return str(val)[:10]
+
+
+def write_zdroj(wb, df_raw, source_path):
+    mtime = datetime.datetime.fromtimestamp(os.path.getmtime(source_path))
+    sheet_name = f"ZDROJ ({mtime.strftime('%d%b%Y').upper()})"
+    ws = wb.create_sheet(sheet_name)
+    ws.sheet_view.showGridLines = True
+
+    # write raw headers + data as plain table
+    headers = list(df_raw.columns)
+    for c, h in enumerate(headers, 1):
+        cell = ws.cell(row=1, column=c, value=h)
+        cell.font = Font(name="Arial", bold=True, size=9, color="FFFFFF")
+        cell.fill = PatternFill("solid", fgColor="404040")
+        cell.alignment = LEFT
+        cell.border = BORDER
+        ws.column_dimensions[get_column_letter(c)].width = 20
+
+    for r, (_, row) in enumerate(df_raw.iterrows(), 2):
+        fill = EVEN_FILL if r % 2 == 0 else ODD_FILL
+        for c, col in enumerate(headers, 1):
+            val = row[col]
+            if pd.isna(val):
+                val = ""
+            elif hasattr(val, "strftime"):
+                val = val.strftime("%Y-%m-%d")
+            cell = ws.cell(row=r, column=c, value=val)
+            cell.font = Font(name="Arial", size=9)
+            cell.fill = fill
+            cell.border = BORDER
+            cell.alignment = LEFT
+
+    ws.freeze_panes = "A2"
+    ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}1"
+
+
+def write_prehled(wb, df_raw, study):
+    ws = wb.create_sheet("Přehled")
+    ws.sheet_view.showGridLines = False
+    ws.sheet_view.showRowColHeaders = True
+
+    # ── title row ────────────────────────────────────────────────────────────
+    ws.merge_cells("A1:H1")
+    title = ws["A1"]
+    title.value = f"Subject Summary — {study}   ({datetime.date.today().strftime('%d-%b-%Y')})"
+    title.font = Font(name="Arial", bold=True, size=12, color="1F4E79")
+    title.alignment = Alignment(horizontal="left", vertical="center")
+    ws.row_dimensions[1].height = 22
+
+    # ── header row ───────────────────────────────────────────────────────────
+    for c, (h, w) in enumerate(zip(DISPLAY_HEADERS, COL_WIDTHS), 1):
+        cell = ws.cell(row=2, column=c, value=h)
+        cell.font = HEADER_FONT
+        cell.fill = HEADER_FILL
+        cell.alignment = CENTER
+        cell.border = BORDER
+        ws.column_dimensions[get_column_letter(c)].width = w
+    ws.row_dimensions[2].height = 18
+
+    # ── build display dataframe ───────────────────────────────────────────────
+    display = pd.DataFrame()
+    display["Subject"]     = df_raw["Subject"].fillna("")
+    display["Investigator"]= df_raw["Investigator"].fillna("")
+    display["Věk"]         = df_raw["Subject's age collection"].apply(
+                                 lambda v: "" if pd.isna(v) else int(v))
+    display["Cohort"]      = df_raw["Cohort per IRT"].apply(simplify_cohort)
+    display["Status"]      = df_raw["IRT Subject Status"].fillna("")
+    display["Last IRT"]    = df_raw["Last Recorded IRT Transaction"].fillna("—")
+    display["Next Visit"]  = df_raw["Next Expected IRT Transaction"].fillna("—")
+    display["Next Date"]   = df_raw["Next Expected IRT Transaction Date [Local]"].apply(format_date)
+
+    display = display.sort_values("Subject").reset_index(drop=True)
+
+    # ── data rows ────────────────────────────────────────────────────────────
+    for r_idx, row in display.iterrows():
+        excel_row = r_idx + 3  # row 1=title, row 2=header
+        status = str(row["Status"])
+        is_failed     = "Screen Failed" in status or "Discontinued" in status
+        is_randomized = "Randomized" in status
+        is_adolescent = row["Cohort"] == "Adolescent"
+        fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL
+
+        values = [
+            row["Subject"], row["Investigator"], row["Věk"],
+            row["Cohort"], row["Status"], row["Last IRT"],
+            row["Next Visit"], row["Next Date"],
+        ]
+
+        for c_idx, val in enumerate(values, 1):
+            cell = ws.cell(row=excel_row, column=c_idx, value=val if val != "" else None)
+            cell.fill  = fill
+            cell.border = BORDER
+
+            # alignment
+            cell.alignment = CENTER if c_idx in (3,) else LEFT
+
+            # font logic
+            if is_failed:
+                cell.font = STRIKE_FONT
+            elif c_idx == 5 and is_randomized:
+                cell.font = BOLD_FONT
+            elif c_idx == 4 and is_adolescent:
+                cell.font = ADOLESC_FONT
+            else:
+                cell.font = NORMAL_FONT
+
+        ws.row_dimensions[excel_row].height = 16
+
+    ws.freeze_panes = "A3"
+    last_data_row = len(display) + 2
+    ws.auto_filter.ref = f"A2:H{last_data_row}"
+
+
+def write_next_visits(wb, df_raw, study):
+    ws = wb.create_sheet("Next Visits")
+    ws.sheet_view.showGridLines = False
+
+    # title
+    ws.merge_cells("A1:D1")
+    title = ws["A1"]
+    title.value = f"Next Expected Visits — {study}   ({datetime.date.today().strftime('%d-%b-%Y')})"
+    title.font = Font(name="Arial", bold=True, size=12, color="1F4E79")
+    title.alignment = Alignment(horizontal="left", vertical="center")
+    ws.row_dimensions[1].height = 22
+
+    # headers
+    nv_headers = ["Subject", "Investigator", "Next Visit", "Datum"]
+    nv_widths   = [14, 22, 26, 13]
+    for c, (h, w) in enumerate(zip(nv_headers, nv_widths), 1):
+        cell = ws.cell(row=2, column=c, value=h)
+        cell.font = HEADER_FONT
+        cell.fill = HEADER_FILL
+        cell.alignment = CENTER
+        cell.border = BORDER
+        ws.column_dimensions[get_column_letter(c)].width = w
+    ws.row_dimensions[2].height = 18
+
+    # data — only rows with a Next Date, exclude Screen Failed / Discontinued
+    df = pd.DataFrame()
+    df["Subject"]     = df_raw["Subject"].fillna("")
+    df["Investigator"]= df_raw["Investigator"].fillna("")
+    df["Next Visit"]  = df_raw["Next Expected IRT Transaction"].fillna("")
+    df["Datum"]       = df_raw["Next Expected IRT Transaction Date [Local]"]
+    df["Status"]      = df_raw["IRT Subject Status"].fillna("")
+
+    df = df[df["Datum"].notna()]
+    df = df[~df["Status"].str.contains("Screen Failed|Discontinued", na=False)]
+    df = df.sort_values("Datum").reset_index(drop=True)
+
+    for r_idx, row in df.iterrows():
+        excel_row = r_idx + 3
+        fill = EVEN_FILL if r_idx % 2 == 0 else ODD_FILL
+        datum_val = row["Datum"]
+        datum_str = datum_val.strftime("%Y-%m-%d") if hasattr(datum_val, "strftime") else str(datum_val)[:10]
+
+        values = [row["Subject"], row["Investigator"], row["Next Visit"], datum_str]
+        for c_idx, val in enumerate(values, 1):
+            cell = ws.cell(row=excel_row, column=c_idx, value=val if val != "" else None)
+            cell.fill = fill
+            cell.border = BORDER
+            cell.font = NORMAL_FONT
+            cell.alignment = LEFT
+        ws.row_dimensions[excel_row].height = 16
+
+    ws.freeze_panes = "A3"
+    last_data_row = len(df) + 2
+    ws.auto_filter.ref = f"A2:D{last_data_row}"
+
+
+def create_report(study):
+    source_path = find_latest_source(study)
+    print(f"[{study}] Čtu: {os.path.basename(source_path)}")
+
+    df_raw = load_source(source_path)
+
+    wb = Workbook()
+    wb.remove(wb.active)  # remove default sheet
+
+    write_prehled(wb, df_raw, study)
+    write_next_visits(wb, df_raw, study)
+    write_zdroj(wb, df_raw, source_path)
+
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    out_path = unique_path(CREATED_DIR, f"{today} {study} Subject Summary")
+    wb.save(out_path)
+    print(f"[{study}] Uloženo: {out_path}")
+    return out_path
+
+
+def main():
+    os.makedirs(CREATED_DIR, exist_ok=True)
+    for study in STUDIES:
+        try:
+            create_report(study)
+        except FileNotFoundError as e:
+            print(f"[{study}] PŘESKOČENO: {e}")
+    print("\nHotovo.")
+
+
+main()
@@ -0,0 +1,90 @@
+"""
+Stažení reportů z IWRS portálu — vše do jednoho adresáře `Incoming/`.
+
+  1. Subject Summary Report (per studie)
+  2. Subject Detail Reports + notifikace (per subjekt)
+
+Import se spouští samostatně skriptem `import_all.py`.
+"""
+
+import os
+import datetime
+
+from playwright.sync_api import sync_playwright
+
+import download_subject_details as dsd
+
+# ── CONFIG ───────────────────────────────────────────────────────────────────
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "Incoming")
+
+
+def unique_path(directory, stem, ext=".xlsx"):
+    path = os.path.join(directory, f"{stem}{ext}")
+    if not os.path.exists(path):
+        return path
+    time_tag = datetime.datetime.now().strftime("%H%M")
+    return os.path.join(directory, f"{stem} {time_tag}{ext}")
+
+
+def login(page, study):
+    page.goto(BASE_URL)
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Email *").fill(EMAIL)
+    page.get_by_label("Password *").fill(PASSWORD)
+    page.locator("#login__submit").click()
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Study *").click()
+    page.get_by_role("option", name=study).click()
+    page.get_by_role("button", name="SELECT").click()
+    page.wait_for_load_state("networkidle")
+
+
+def download_summary(page, study, today):
+    print(f"  [{study}] Stahuji Subject Summary Report...")
+    page.goto(f"{BASE_URL}/report/patient_summary_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+    filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
+    with page.expect_download(timeout=120000) as dl:
+        page.get_by_role("button", name="Download XLS").click()
+    dl.value.save_as(filename)
+    print(f"  [{study}] Summary OK -> {os.path.basename(filename)}")
+    return filename
+
+
+def main():
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    os.makedirs(INCOMING_DIR, exist_ok=True)
+
+    with sync_playwright() as p:
+        for study in STUDIES:
+            print("\n" + "=" * 60)
+            print(f"[{study}] Stažení reportů")
+            print("=" * 60)
+            browser = p.chromium.launch(headless=False)
+            context = browser.new_context(accept_downloads=True)
+            page = context.new_page()
+            try:
+                login(page, study)
+                download_summary(page, study, today)
+                # detail XLSX + notifikace přímo do Incoming/
+                dsd.run(page, study, out_dir=INCOMING_DIR, subjects_source_dir=INCOMING_DIR)
+            except Exception as e:
+                print(f"  [{study}] CHYBA: {e}")
+            finally:
+                browser.close()
+
+    print("\n" + "=" * 60)
+    print(f"Stahování hotovo. Soubory v: {INCOMING_DIR}")
+    print("Pro import spusť: python import_all.py")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,201 @@
+from playwright.sync_api import sync_playwright
+import os
+import glob
+import datetime
+import requests
+
+import pandas as pd
+
+# ── CONFIG ──────────────────────────────────────────────────────────────────
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+DETAILS_DIR  = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
+# ────────────────────────────────────────────────────────────────────────────
+
+
+def get_subjects(study):
+    pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
+    files = sorted(
+        [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
+        key=os.path.getmtime,
+        reverse=True,
+    )
+    if not files:
+        raise FileNotFoundError(f"Nenalezen Subject Summary Report pro {study}")
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    if not os.path.basename(files[0]).startswith(today):
+        raise FileNotFoundError(
+            f"Dnešní Subject Summary Report pro {study} neexistuje — spusť nejdříve download_subject_summary.py"
+        )
+    path = files[0]
+    print(f"  Čtu subjekty z: {os.path.basename(path)}")
+
+    raw = pd.read_excel(path, header=None)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError("Hlavičkový řádek nenalezen")
+
+    df = pd.read_excel(path, header=header_row)
+    subjects = df["Subject"].dropna().astype(str).str.strip().tolist()
+    return subjects
+
+
+def get_jwt_and_api_base(page, study):
+    """Získá JWT token a api_base_url pro danou studii."""
+    jwt = page.evaluate("localStorage.getItem('JWT.access')")
+    if not jwt:
+        raise ValueError("JWT token nenalezen v localStorage")
+
+    instances = page.evaluate("""async (jwt) => {
+        const res = await fetch('/_/api/dispatch/app_instances/', {
+            headers: { 'Authorization': `Bearer ${jwt}` }
+        });
+        return res.json();
+    }""", jwt)
+
+    instance = next(
+        (i for i in instances if study in i.get("label", "")),
+        None
+    )
+    if not instance:
+        raise ValueError(f"app_instance pro studii {study} nenalezena")
+
+    return jwt, instance["api_base_url"]
+
+
+def get_notifications(jwt, api_base, study, subject):
+    """Načte seznam notifikací pro daného subjekta přes report_data API."""
+    url = f"{BASE_URL}{api_base}/api/v1/reports_api/report_data"
+    params = {
+        "path": "patient_detail_report",
+        "id": subject,
+        "key": "table_1",
+        "unblinded": "false",
+    }
+    payload = {
+        "path": "patient_detail_report",
+        "study": study,
+        "id": subject,
+        "key": "table_1",
+        "fields": {},
+        "filters": [{"tableId": "table_1", "tableFilters": {}}],
+        "pagination_details": {"order": "type", "reverseOrder": False, "page": 1, "limit": 500},
+        "cache_key": f"py_{subject}_{datetime.datetime.now().timestamp()}",
+    }
+    headers = {
+        "Authorization": f"Bearer {jwt}",
+        "Content-Type": "application/json",
+        "lang": "en",
+    }
+    resp = requests.post(url, params=params, json=payload, headers=headers)
+    resp.raise_for_status()
+    data = resp.json()
+
+    notifications = []
+    for row in data.get("data", []):
+        for notif in row.get("notification", []):
+            item = notif.get("item", {})
+            pk = item.get("pk")
+            title = item.get("et_title")
+            if pk and title:
+                notifications.append({"pk": pk, "title": title, "event": row.get("event_event_id", "")})
+    return notifications
+
+
+def download_pdf(jwt, api_base, pk, title, out_path):
+    """Stáhne PDF notifikaci a uloží ji."""
+    url = f"{BASE_URL}{api_base}/api/v1/meta_api/pdfnotification"
+    params = {"pk": pk, "title": title, "html": "true"}
+    headers = {
+        "Authorization": f"Bearer {jwt}",
+        "lang": "en",
+        "Accept": "*/*",
+    }
+    resp = requests.get(url, params=params, headers=headers)
+    resp.raise_for_status()
+    with open(out_path, "wb") as f:
+        f.write(resp.content)
+
+
+def run(page, study):
+    out_dir = os.path.join(DETAILS_DIR, study)
+    os.makedirs(out_dir, exist_ok=True)
+
+    subjects = get_subjects(study)
+    print(f"  Nalezeno {len(subjects)} subjektů")
+    today = datetime.date.today().strftime("%Y-%m-%d")
+
+    # Načteme stránku aby byl platný session kontext
+    page.goto(f"{BASE_URL}/report/patient_detail_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+
+    jwt, api_base = get_jwt_and_api_base(page, study)
+    print(f"  API base: {api_base}")
+
+    for subject in subjects:
+        print(f"  [{subject}] Stahuji notifikace...")
+        try:
+            notifications = get_notifications(jwt, api_base, study, subject)
+            if not notifications:
+                print(f"  [{subject}] Žádné notifikace")
+                continue
+
+            for notif in notifications:
+                pk = notif["pk"]
+                title = notif["title"]
+                filename = os.path.join(out_dir, f"{today} {study} {subject} Notification {title} pk{pk}.pdf")
+                if os.path.exists(filename):
+                    print(f"  [{subject}] {title} (pk={pk}) — již existuje, přeskakuji")
+                    continue
+                download_pdf(jwt, api_base, pk, title, filename)
+                print(f"  [{subject}] {title} (pk={pk}) OK")
+
+        except Exception as e:
+            print(f"  [{subject}] CHYBA při notifikacích: {e}")
+
+    print(f"  [{study}] Notifikace hotovo.")
+
+
+def main():
+    os.makedirs(DETAILS_DIR, exist_ok=True)
+
+    with sync_playwright() as p:
+        for study in STUDIES:
+            print(f"\n[{study}] Přihlášení...")
+            browser = p.chromium.launch(headless=False)
+            context = browser.new_context(accept_downloads=True)
+            page = context.new_page()
+
+            page.goto(BASE_URL)
+            page.wait_for_load_state("networkidle")
+            page.get_by_label("Email *").fill(EMAIL)
+            page.get_by_label("Password *").fill(PASSWORD)
+            page.locator("#login__submit").click()
+            page.wait_for_load_state("networkidle")
+
+            page.get_by_label("Study *").click()
+            page.get_by_role("option", name=study).click()
+            page.get_by_role("button", name="SELECT").click()
+            page.wait_for_load_state("networkidle")
+
+            try:
+                run(page, study)
+            except Exception as e:
+                print(f"  [{study}] CHYBA: {e}")
+
+            browser.close()
+
+    print("\nVše hotovo.")
+
+
+main()
@@ -0,0 +1,76 @@
+from playwright.sync_api import sync_playwright
+import os
+import datetime
+
+# ── CONFIG ──────────────────────────────────────────────────────────────────
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+CREATED_DIR  = os.path.join(BASE_DIR, "CreatedReports")
+# ────────────────────────────────────────────────────────────────────────────
+
+
+def unique_path(directory, stem):
+    path = os.path.join(directory, f"{stem}.xlsx")
+    if not os.path.exists(path):
+        return path
+    time_tag = datetime.datetime.now().strftime("%H%M")
+    return os.path.join(directory, f"{stem} {time_tag}.xlsx")
+
+
+def download_study(page, study, today):
+    print(f"\n[{study}] Prihlaseni...")
+    page.goto(BASE_URL)
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Email *").fill(EMAIL)
+    page.get_by_label("Password *").fill(PASSWORD)
+    page.locator("#login__submit").click()
+    page.wait_for_load_state("networkidle")
+
+    print(f"[{study}] Vyber studie...")
+    page.get_by_label("Study *").click()
+    page.get_by_role("option", name=study).click()
+    page.get_by_role("button", name="SELECT").click()
+    page.wait_for_load_state("networkidle")
+
+    print(f"[{study}] Stahuji Subject Summary Report...")
+    page.goto(f"{BASE_URL}/report/patient_summary_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+
+    filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
+    with page.expect_download(timeout=120000) as dl:
+        page.get_by_role("button", name="Download XLS").click()
+    dl.value.save_as(filename)
+    print(f"[{study}] OK -> {filename}")
+    return filename
+
+
+def main():
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    os.makedirs(INCOMING_DIR, exist_ok=True)
+    os.makedirs(CREATED_DIR, exist_ok=True)
+
+    downloaded = []
+
+    with sync_playwright() as p:
+        for study in STUDIES:
+            browser = p.chromium.launch(headless=False)
+            context = browser.new_context(accept_downloads=True)
+            page = context.new_page()
+
+            filename = download_study(page, study, today)
+            downloaded.append((study, filename))
+
+            browser.close()
+
+    print("\nVse stazeno:")
+    for study, path in downloaded:
+        print(f"  {study}: {path}")
+
+
+main()
@@ -0,0 +1,107 @@
+"""
+Import všech čekajících reportů z `Incoming/` do MongoDB.
+
+Pořadí zpracování per typ + studie: nejstarší soubor podle mtime první
+(důležité pro chronologickou správnost snapshotů).
+
+Po úspěšném importu se soubor přesune do `Incoming/Zpracováno/`.
+Při chybě zůstane soubor v `Incoming/`.
+"""
+
+import os
+import sys
+import glob
+import shutil
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from common.mongo_writer import ensure_indexes
+
+import import_to_mongo
+import import_notifications_to_mongo
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "Incoming")
+DONE_DIR     = os.path.join(INCOMING_DIR, "Zpracováno")
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+
+def _move_done(path):
+    os.makedirs(DONE_DIR, exist_ok=True)
+    dst = os.path.join(DONE_DIR, os.path.basename(path))
+    # kolize → přepiš (Mongo už má aktuální data, soubor je jen archiv)
+    if os.path.exists(dst):
+        os.remove(dst)
+    shutil.move(path, dst)
+
+
+def _sorted_by_mtime(paths):
+    """Nejstarší první."""
+    return sorted(
+        (p for p in paths if not os.path.basename(p).startswith("~$")),
+        key=os.path.getmtime,
+    )
+
+
+def import_summaries(study):
+    pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report*.xlsx")
+    files = _sorted_by_mtime(glob.glob(pattern))
+    if not files:
+        print(f"  [{study}] summary: nic ke zpracování")
+        return
+    print(f"  [{study}] summary: {len(files)} soubor(ů) (oldest first)")
+    for path in files:
+        try:
+            import_to_mongo.import_subject_summary(study, path)
+            _move_done(path)
+        except Exception as e:
+            print(f"  [{study}] CHYBA summary {os.path.basename(path)}: {e}")
+
+
+def import_details(study):
+    pattern = os.path.join(INCOMING_DIR, f"* {study} * Subject Detail.xlsx")
+    files = _sorted_by_mtime(glob.glob(pattern))
+    if not files:
+        print(f"  [{study}] detail: nic ke zpracování")
+        return
+    print(f"  [{study}] detail: {len(files)} soubor(ů) (oldest first)")
+    for path in files:
+        parsed = import_to_mongo.parse_detail_filename(path)
+        if not parsed:
+            print(f"  [{study}] PŘESKAKUJI (nelze parsovat název): {os.path.basename(path)}")
+            continue
+        _, parsed_study, subject = parsed
+        if parsed_study != study:
+            continue  # patří jiné studii
+        try:
+            import_to_mongo.import_visits_single_file(study, subject, path)
+            _move_done(path)
+        except Exception as e:
+            print(f"  [{study}] CHYBA detail {os.path.basename(path)}: {e}")
+
+
+def main():
+    if not os.path.isdir(INCOMING_DIR):
+        print(f"Adresář neexistuje: {INCOMING_DIR}")
+        return
+    ensure_indexes()
+
+    print("=" * 60)
+    print("Import Subject Summary + Visits")
+    print("=" * 60)
+    for study in STUDIES:
+        import_summaries(study)
+        import_details(study)
+
+    print("\n" + "=" * 60)
+    print("Import notifikací")
+    print("=" * 60)
+    import_notifications_to_mongo.import_from_dir(INCOMING_DIR, DONE_DIR, STUDIES)
+
+    print("\n" + "=" * 60)
+    print(f"Hotovo. Zpracované soubory: {DONE_DIR}")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,453 @@
+"""
+Importuje data z IWRS Excel reportů do MySQL (databáze studie).
+
+Pořadí spuštění:
+  1. download_subject_summary.py
+  2. download_subject_details.py
+  3. tento skript
+
+Každé spuštění vytvoří nový import_id v iwrs_import.
+Reportovací skripty pracují vždy s MAX(import_id) pro danou studii.
+"""
+
+import os
+import glob
+import datetime
+import re
+
+import numpy as np
+import pandas as pd
+import mysql.connector
+
+import db_config
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+DETAILS_DIR  = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+
+# ── helpers ──────────────────────────────────────────────────────────────────
+
+def get_conn():
+    return mysql.connector.connect(
+        host=db_config.DB_HOST,
+        port=db_config.DB_PORT,
+        user=db_config.DB_USER,
+        password=db_config.DB_PASSWORD,
+        database=db_config.DB_NAME,
+    )
+
+
+def _py(val):
+    """Převede numpy skalár na Python nativní typ."""
+    if isinstance(val, np.generic):
+        return val.item()
+    return val
+
+
+def to_date(val):
+    """Převede pandas Timestamp / string / NaT / NaN na date nebo None."""
+    val = _py(val)
+    if val is None or (isinstance(val, float) and (val != val)):  # NaN check
+        return None
+    try:
+        if pd.isna(val):
+            return None
+    except (TypeError, ValueError):
+        pass
+    if isinstance(val, pd.Timestamp):
+        return None if pd.isna(val) else val.date()
+    if isinstance(val, datetime.datetime):
+        return val.date()
+    if isinstance(val, datetime.date):
+        return val
+    s = str(val).strip()
+    if not s or s.lower() in ("nat", "nan", "none", ""):
+        return None
+    for fmt in ("%Y-%m-%d", "%d-%b-%Y", "%d-%m-%Y", "%Y-%m-%d %H:%M:%S"):
+        try:
+            return datetime.datetime.strptime(s, fmt).date()
+        except ValueError:
+            pass
+    return None
+
+
+def to_int(val):
+    val = _py(val)
+    try:
+        v = float(val)
+        return None if (v != v) else int(v)  # v != v je True jen pro NaN
+    except (TypeError, ValueError):
+        return None
+
+
+def to_float(val):
+    val = _py(val)
+    try:
+        v = float(val)
+        return None if (v != v) else float(v)
+    except (TypeError, ValueError):
+        return None
+
+
+def to_str(val):
+    val = _py(val)
+    if val is None:
+        return None
+    if isinstance(val, float) and (val != val):  # NaN
+        return None
+    s = str(val).strip()
+    return None if s.lower() in ("nan", "nat", "none", "") else s
+
+
+def find_summary_file(study):
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
+    files = sorted(
+        [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
+        key=os.path.getmtime,
+        reverse=True,
+    )
+    if not files:
+        raise FileNotFoundError(f"Nenalezen Subject Summary Report pro {study}")
+    if not os.path.basename(files[0]).startswith(today):
+        print(f"  UPOZORNĚNÍ: nejnovější Summary Report pro {study} není z dnešního dne ({os.path.basename(files[0])[:10]})")
+    return files[0]
+
+
+def read_summary_df(path):
+    """Přečte Summary xlsx, vrátí DataFrame od řádku s hlavičkou."""
+    raw = pd.read_excel(path, header=None)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError(f"Hlavičkový řádek nenalezen v {path}")
+    return pd.read_excel(path, header=header_row)
+
+
+def find_detail_files(study):
+    out_dir = os.path.join(DETAILS_DIR, study)
+    # Vezme soubory ze stejného dne jako nejnovější Summary Report
+    summary_path = find_summary_file(study)
+    file_date = os.path.basename(summary_path)[:10]  # "YYYY-MM-DD"
+    pattern = os.path.join(out_dir, f"{file_date} {study} * Subject Detail.xlsx")
+    files = [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")]
+    return sorted(files)
+
+
+def parse_detail_visits(path):
+    """
+    Vrátí list slovníků s daty visitů z Detail xlsx.
+    Každý řádek tabulky (od řádku s hlavičkou Visit Type) je jedna transakce.
+    """
+    df = pd.read_excel(path, sheet_name="patient_detail_report", header=None)
+
+    header_row = None
+    for i, row in df.iterrows():
+        if "Visit Type" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        return []
+
+    visits_df = df.iloc[header_row + 1:].copy()
+    visits_df.columns = range(visits_df.shape[1])
+
+    rows = []
+    for _, r in visits_df.iterrows():
+        visit_type = to_str(r.get(0))
+        if visit_type not in ("Past", "Upcoming"):
+            continue
+        rows.append({
+            "visit_type":                  visit_type,
+            "scheduled_date":              to_date(r.get(1)),
+            "window_days":                 to_str(r.get(2)),
+            "actual_date":                 to_date(r.get(3)),
+            "irt_transaction_no":          to_int(r.get(4)),
+            "irt_transaction_description": to_str(r.get(5)),
+            "medication_assignment":       to_str(r.get(6)),
+            "quantity_assigned":           to_int(r.get(7)),
+            "medication_id":               to_str(r.get(8)),
+        })
+    return rows
+
+
+# ── insert helpers ────────────────────────────────────────────────────────────
+
+def insert_import(cursor, study, source_file):
+    cursor.execute(
+        "INSERT INTO iwrs_import (study, imported_at, source_file) VALUES (%s, %s, %s)",
+        (study, datetime.datetime.now(), os.path.basename(source_file)),
+    )
+    return cursor.lastrowid
+
+
+def insert_uco3001_summary(cursor, import_id, df):
+    sql = """
+        INSERT INTO iwrs_uco3001_subject_summary (
+            import_id, subject, prior_subject_identifier, site, investigator, location,
+            cohort_per_irt, informed_consent_date, adolescent_assent_date, age, weight,
+            rescreened_subject, adt_ir, three_or_more_advanced_therapies,
+            only_oral_5asa_compounds, ustekinumab, isolated_proctitis,
+            clinical_responder_status_i12_m0, irt_subject_status,
+            i0_rand_date_local, last_irt_transaction,
+            last_irt_transaction_date_local, last_irt_transaction_date_utc,
+            next_irt_transaction, next_irt_transaction_date_local,
+            most_recent_med_assignment_date, days_since_last_med_assignment,
+            patient_forecast_status, patient_forecast_status_changed_date
+        ) VALUES (
+            %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
+        )
+    """
+    col = df.columns.tolist()
+
+    def c(name):
+        return col.index(name) if name in col else None
+
+    for _, r in df.iterrows():
+        cursor.execute(sql, (
+            import_id,
+            to_str(r["Subject"]),
+            to_str(r["Prior Subject Identifier"]) if "Prior Subject Identifier" in col else None,
+            to_str(r["Site"]),
+            to_str(r["Investigator"]),
+            to_str(r["Location"]),
+            to_str(r["Cohort per IRT"]),
+            to_date(r["Informed Consent Date"]),
+            to_date(r["Adolescent Assent Date"]) if "Adolescent Assent Date" in col else None,
+            to_int(r["Subject's age collection"]),
+            to_float(r["Subject's weight collection"]) if "Subject's weight collection" in col else None,
+            to_str(r["Rescreened Subject"]) if "Rescreened Subject" in col else None,
+            to_str(r["ADT-IR"]) if "ADT-IR" in col else None,
+            to_str(r["3 or More Advanced Therapies"]) if "3 or More Advanced Therapies" in col else None,
+            to_str(r["Only Oral 5-ASA Compounds"]) if "Only Oral 5-ASA Compounds" in col else None,
+            to_str(r["Ustekinumab"]) if "Ustekinumab" in col else None,
+            to_str(r["Isolated Proctitis"]) if "Isolated Proctitis" in col else None,
+            to_str(r["Clinical Responder Status at I-12 / M-0"]) if "Clinical Responder Status at I-12 / M-0" in col else None,
+            to_str(r["IRT Subject Status"]),
+            to_date(r["I0_RAND_TIMESTAMP_LOCAL [Local]"]) if "I0_RAND_TIMESTAMP_LOCAL [Local]" in col else None,
+            to_str(r["Last Recorded IRT Transaction"]),
+            to_date(r["Last Recorded IRT Transaction Date [Local]"]),
+            to_date(r["Last Recorded IRT Transaction Date (UTC)"]),
+            to_str(r["Next Expected IRT Transaction"]),
+            to_date(r["Next Expected IRT Transaction Date [Local]"]),
+            to_date(r["Most Recent Medication Assignment Transaction [Local]"]) if "Most Recent Medication Assignment Transaction [Local]" in col else None,
+            to_int(r["Days Since Last Medication Assignment Transaction"]) if "Days Since Last Medication Assignment Transaction" in col else None,
+            to_str(r["Patient Forecast Status"]) if "Patient Forecast Status" in col else None,
+            to_date(r["Patient Forecast Status Changed Date (UTC)"]) if "Patient Forecast Status Changed Date (UTC)" in col else None,
+        ))
+
+
+def insert_mdd3003_summary(cursor, import_id, df):
+    sql = """
+        INSERT INTO iwrs_mdd3003_subject_summary (
+            import_id, subject, prior_subject_identifier, site, investigator, location,
+            cohort_per_irt, madrs_criteria_integrated, informed_consent_date, age,
+            madrs_criteria_v15, madrs_criteria_v16, madrs_criteria_v17,
+            stratification_country, age_group, stable_remitters, irt_subject_status,
+            last_irt_transaction, last_irt_transaction_date_local,
+            last_irt_transaction_date_utc, next_irt_transaction,
+            next_irt_transaction_date_local, date_screened, date_screen_failed,
+            date_randomized_part1, date_early_withdraw_randomized_part1,
+            date_open_label_induction, date_early_withdraw_open_label_induction,
+            date_randomized_part2, date_early_withdraw_randomized_part2,
+            date_completed, date_unblinded
+        ) VALUES (
+            %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
+        )
+    """
+    col = df.columns.tolist()
+
+    for _, r in df.iterrows():
+        cursor.execute(sql, (
+            import_id,
+            to_str(r["Subject"]),
+            to_str(r["Prior Subject Identifier"]) if "Prior Subject Identifier" in col else None,
+            to_str(r["Site"]),
+            to_str(r["Investigator"]),
+            to_str(r["Location"]),
+            to_str(r["Cohort per IRT"]),
+            to_str(r["MADRS response criteria integrated or manually entered"]) if "MADRS response criteria integrated or manually entered" in col else None,
+            to_date(r["Informed Consent Date"]),
+            to_int(r["Subject's age collection"]),
+            to_str(r["MADRS response criteria v1.5 from RAVE"]) if "MADRS response criteria v1.5 from RAVE" in col else None,
+            to_str(r["MADRS response criteria v1.6 from RAVE"]) if "MADRS response criteria v1.6 from RAVE" in col else None,
+            to_str(r["MADRS response criteria v1.7 from RAVE"]) if "MADRS response criteria v1.7 from RAVE" in col else None,
+            to_str(r["Stratification Country"]) if "Stratification Country" in col else None,
+            to_str(r["Age Group"]) if "Age Group" in col else None,
+            to_str(r["Stable Remitters vs. Non Stable Remitters"]) if "Stable Remitters vs. Non Stable Remitters" in col else None,
+            to_str(r["IRT Subject Status"]),
+            to_str(r["Last Recorded IRT Transaction"]),
+            to_date(r["Last Recorded IRT Transaction Date [Local]"]),
+            to_date(r["Last Recorded IRT Transaction Date (UTC)"]),
+            to_str(r["Next Expected IRT Transaction"]),
+            to_date(r["Next Expected IRT Transaction Date [Local]"]),
+            to_date(r["Date Screened [Local]"]) if "Date Screened [Local]" in col else None,
+            to_date(r["Date Screen Failed [Local]"]) if "Date Screen Failed [Local]" in col else None,
+            to_date(r["Date Randomized Part 1 [Local]"]) if "Date Randomized Part 1 [Local]" in col else None,
+            to_date(r["Date Early Withdraw Randomized Part 1 [Local]"]) if "Date Early Withdraw Randomized Part 1 [Local]" in col else None,
+            to_date(r["Date Open Label Induction [Local]"]) if "Date Open Label Induction [Local]" in col else None,
+            to_date(r["Date Early Withdraw Open Label Induction [Local]"]) if "Date Early Withdraw Open Label Induction [Local]" in col else None,
+            to_date(r["Date Randomized Part 2 [Local]"]) if "Date Randomized Part 2 [Local]" in col else None,
+            to_date(r["Date Early Withdraw Randomized Part 2 [Local]"]) if "Date Early Withdraw Randomized Part 2 [Local]" in col else None,
+            to_date(r["Date Completed [Local]"]) if "Date Completed [Local]" in col else None,
+            to_date(r["Date Unblinded [Local]"]) if "Date Unblinded [Local]" in col else None,
+        ))
+
+
+def insert_visits(cursor, import_id, study, subject, visits):
+    if not visits:
+        return
+    sql = """
+        INSERT INTO iwrs_subject_visits (
+            import_id, study, subject, visit_type, scheduled_date, window_days,
+            actual_date, irt_transaction_no, irt_transaction_description,
+            medication_assignment, quantity_assigned, medication_id
+        ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
+    """
+    for v in visits:
+        cursor.execute(sql, (
+            import_id, study, subject,
+            v["visit_type"], v["scheduled_date"], v["window_days"],
+            v["actual_date"], v["irt_transaction_no"],
+            v["irt_transaction_description"], v["medication_assignment"],
+            v["quantity_assigned"], v["medication_id"],
+        ))
+
+
+# ── notifications ─────────────────────────────────────────────────────────────
+
+def find_notification_json_files(study):
+    """Najde všechny .json soubory notifikací pro danou studii."""
+    out_dir = os.path.join(DETAILS_DIR, study)
+    return sorted(glob.glob(os.path.join(out_dir, "*.json")))
+
+
+def import_notifications(conn, study):
+    import json as json_lib
+    json_files = find_notification_json_files(study)
+    if not json_files:
+        print(f"  Žádné notifikace k importu pro {study}")
+        return 0
+
+    sql = """
+        INSERT INTO iwrs_notifications
+            (study, subject, pk, title, label, event, actual_date, text, pdf, source_file)
+        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+        ON DUPLICATE KEY UPDATE
+            label       = VALUES(label),
+            text        = VALUES(text),
+            pdf         = VALUES(pdf),
+            source_file = VALUES(source_file)
+    """
+
+    done_dir = os.path.join(os.path.join(DETAILS_DIR, study), "Zpracováno")
+    os.makedirs(done_dir, exist_ok=True)
+
+    cursor = conn.cursor()
+    count = 0
+    for json_path in json_files:
+        try:
+            with open(json_path, "r", encoding="utf-8") as f:
+                meta = json_lib.load(f)
+
+            pdf_path = json_path.replace(".json", ".pdf")
+            pdf_data = None
+            if os.path.exists(pdf_path):
+                with open(pdf_path, "rb") as f:
+                    pdf_data = f.read()
+
+            cursor.execute(sql, (
+                meta.get("study", study),
+                meta.get("subject"),
+                meta.get("pk"),
+                meta.get("title"),
+                meta.get("label"),
+                meta.get("event"),
+                to_date(meta.get("actual_date")),
+                meta.get("text"),
+                pdf_data,
+                os.path.basename(json_path),
+            ))
+            count += 1
+
+            # Přesun do Zpracováno
+            import shutil
+            shutil.move(json_path, os.path.join(done_dir, os.path.basename(json_path)))
+            if os.path.exists(pdf_path):
+                shutil.move(pdf_path, os.path.join(done_dir, os.path.basename(pdf_path)))
+
+        except Exception as e:
+            print(f"  CHYBA při importu {os.path.basename(json_path)}: {e}")
+
+    conn.commit()
+    cursor.close()
+    print(f"  Notifikací uloženo/přesunuto: {count}")
+    return count
+
+
+# ── main ──────────────────────────────────────────────────────────────────────
+
+def import_study(conn, study):
+    summary_path = find_summary_file(study)
+    print(f"  Summary: {os.path.basename(summary_path)}")
+
+    df_summary = read_summary_df(summary_path)
+    df_summary = df_summary.dropna(how="all")
+
+    detail_files = find_detail_files(study)
+    print(f"  Detail souborů: {len(detail_files)}")
+
+    cursor = conn.cursor()
+    import_id = insert_import(cursor, study, summary_path)
+    print(f"  import_id = {import_id}")
+
+    if study == "77242113UCO3001":
+        insert_uco3001_summary(cursor, import_id, df_summary)
+    else:
+        insert_mdd3003_summary(cursor, import_id, df_summary)
+    print(f"  Summary řádků: {len(df_summary)}")
+
+    visited = 0
+    for path in detail_files:
+        fname = os.path.basename(path)
+        # název: "2026-05-04 77242113UCO3001 CZ100012001 Subject Detail.xlsx"
+        m = re.search(r"\d{4}-\d{2}-\d{2} \S+ (\S+) Subject Detail\.xlsx", fname)
+        subject = m.group(1) if m else "UNKNOWN"
+        visits = parse_detail_visits(path)
+        insert_visits(cursor, import_id, study, subject, visits)
+        visited += len(visits)
+
+    conn.commit()
+    cursor.close()
+    print(f"  Transakce uloženo: {visited}")
+    return import_id
+
+
+def main():
+    conn = get_conn()
+    print("Připojeno k MySQL.\n")
+
+    for study in STUDIES:
+        print(f"[{study}]")
+        try:
+            import_id = import_study(conn, study)
+            print(f"  OK — import_id {import_id}")
+        except Exception as e:
+            print(f"  CHYBA: {e}")
+        try:
+            import_notifications(conn, study)
+        except Exception as e:
+            print(f"  CHYBA notifikace: {e}")
+        print()
+
+    conn.close()
+    print("Hotovo.")
+
+
+main()
@@ -0,0 +1,175 @@
+"""
+Kompletní pipeline:
+  1. Stažení Subject Summary Reportů (obě studie)
+  2. Stažení Subject Detail Reportů + notifikací (obě studie)
+  3. Import do MongoDB (subject_summary + visits + notifications)
+
+Spusť tento skript místo samostatných skriptů.
+"""
+
+import os
+import sys
+import datetime
+import glob
+
+from playwright.sync_api import sync_playwright
+
+import download_subject_details as dsd
+import import_to_mongo
+import import_notifications_to_mongo
+
+# ── CONFIG ───────────────────────────────────────────────────────────────────
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDIES = ["77242113UCO3001", "42847922MDD3003"]
+
+BASE_DIR     = os.path.dirname(os.path.abspath(__file__))
+INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
+DETAILS_DIR  = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
+
+
+# ── helpers ───────────────────────────────────────────────────────────────────
+
+def unique_path(directory, stem):
+    path = os.path.join(directory, f"{stem}.xlsx")
+    if not os.path.exists(path):
+        return path
+    time_tag = datetime.datetime.now().strftime("%H%M")
+    return os.path.join(directory, f"{stem} {time_tag}.xlsx")
+
+
+def login(page, study):
+    page.goto(BASE_URL)
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Email *").fill(EMAIL)
+    page.get_by_label("Password *").fill(PASSWORD)
+    page.locator("#login__submit").click()
+    page.wait_for_load_state("networkidle")
+    page.get_by_label("Study *").click()
+    page.get_by_role("option", name=study).click()
+    page.get_by_role("button", name="SELECT").click()
+    page.wait_for_load_state("networkidle")
+
+
+# ── KROK 1: Subject Summary ───────────────────────────────────────────────────
+
+def download_summary(page, study, today):
+    print(f"  [{study}] Stahuji Subject Summary Report...")
+    page.goto(f"{BASE_URL}/report/patient_summary_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+    filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
+    with page.expect_download(timeout=120000) as dl:
+        page.get_by_role("button", name="Download XLS").click()
+    dl.value.save_as(filename)
+    print(f"  [{study}] Summary OK -> {os.path.basename(filename)}")
+    return filename
+
+
+# ── KROK 2: Subject Details ───────────────────────────────────────────────────
+
+def get_subjects_from_summary(summary_path):
+    import pandas as pd
+    raw = pd.read_excel(summary_path, header=None)
+    header_row = None
+    for i, row in raw.iterrows():
+        if "Subject" in [str(v).strip() for v in row]:
+            header_row = i
+            break
+    if header_row is None:
+        raise ValueError("Hlavičkový řádek nenalezen")
+    df = pd.read_excel(summary_path, header=header_row)
+    return df["Subject"].dropna().astype(str).str.strip().tolist()
+
+
+def download_details(page, study, summary_path, today):
+    out_dir = os.path.join(DETAILS_DIR, study)
+    os.makedirs(out_dir, exist_ok=True)
+
+    subjects = get_subjects_from_summary(summary_path)
+    print(f"  [{study}] Subjektů k stažení: {len(subjects)}")
+
+    page.goto(f"{BASE_URL}/report/patient_detail_report")
+    page.wait_for_load_state("networkidle", timeout=120000)
+
+    for subject in subjects:
+        filename = os.path.join(out_dir, f"{today} {study} {subject} Subject Detail.xlsx")
+        input_field = page.locator('input[placeholder="search"], input[type="text"]').first
+        input_field.click()
+        input_field.fill(subject)
+        page.wait_for_timeout(500)
+        page.locator("mat-option").first.dispatch_event("click")
+        page.wait_for_load_state("networkidle", timeout=120000)
+
+        with page.expect_download(timeout=120000) as dl:
+            page.get_by_role("button", name="Download XLS").click()
+        dl.value.save_as(filename)
+        print(f"  [{study}] Detail {subject} OK")
+
+        page.get_by_role("button", name="Clear").click()
+        page.wait_for_load_state("networkidle", timeout=120000)
+
+
+# ── KROK 3: Import do MongoDB ────────────────────────────────────────────────
+
+def main():
+    today = datetime.date.today().strftime("%Y-%m-%d")
+    os.makedirs(INCOMING_DIR, exist_ok=True)
+    os.makedirs(DETAILS_DIR, exist_ok=True)
+
+    summary_paths = {}
+
+    # Krok 1 + 2: stahování (Playwright, každá studie zvlášť kvůli session)
+    with sync_playwright() as p:
+        for study in STUDIES:
+            print("\n" + "=" * 60)
+            print(f"[{study}] KROK 1: Subject Summary Report")
+            print("=" * 60)
+            browser = p.chromium.launch(headless=False)
+            context = browser.new_context(accept_downloads=True)
+            page = context.new_page()
+
+            try:
+                login(page, study)
+                summary_path = download_summary(page, study, today)
+                summary_paths[study] = summary_path
+
+                print(f"\n[{study}] KROK 2: Subject Detail Reports + notifikace")
+                dsd.run(page, study)
+
+            except Exception as e:
+                print(f"  [{study}] CHYBA při stahování: {e}")
+                summary_paths[study] = None
+            finally:
+                browser.close()
+
+    # Krok 3: import do MongoDB
+    print("\n" + "=" * 60)
+    print("KROK 3: Import do MongoDB")
+    print("=" * 60)
+
+    for study in STUDIES:
+        summary_path = summary_paths.get(study)
+        if not summary_path:
+            print(f"  [{study}] PŘESKOČENO — stahování selhalo")
+            continue
+
+        try:
+            import_to_mongo.run(study, summary_path, DETAILS_DIR, today)
+        except Exception as e:
+            print(f"  [{study}] CHYBA při importu summary/visits: {e}")
+
+    # Notifikace: PDF/JSON z disku rovnou do Mongo iwrs_notifications
+    print("\n  [notifikace] import PDF/JSON do Mongo...")
+    try:
+        import_notifications_to_mongo.main(STUDIES)
+    except Exception as e:
+        print(f"  CHYBA při importu notifikací: {e}")
+
+    print("\n" + "=" * 60)
+    print("Vše hotovo.")
+    print("=" * 60)
+
+
+main()
@@ -0,0 +1,172 @@
+from playwright.sync_api import sync_playwright
+import re
+import os
+import datetime
+import mysql.connector
+import db_config
+
+
+def get_existing_pks(study):
+    """Vrátí set pk notifikací které už jsou v DB pro danou studii."""
+    try:
+        conn = mysql.connector.connect(
+            host=db_config.DB_HOST, port=db_config.DB_PORT,
+            user=db_config.DB_USER, password=db_config.DB_PASSWORD,
+            database=db_config.DB_NAME,
+        )
+        cursor = conn.cursor()
+        cursor.execute("SELECT pk FROM iwrs_notifications WHERE study = %s", (study,))
+        pks = {row[0] for row in cursor.fetchall()}
+        cursor.close()
+        conn.close()
+        return pks
+    except Exception as e:
+        print(f"  UPOZORNĚNÍ: nelze načíst existující pk z DB ({e}), stahuji vše")
+        return set()
+
+BASE_URL = "https://janssen.4gclinical.com"
+EMAIL    = "vbuzalka@its.jnj.com"
+PASSWORD = "Vlado123++-+"
+
+STUDY   = "77242113UCO3001"
+SUBJECT = "CZ100222003"
+
+BASE_DIR    = os.path.dirname(os.path.abspath(__file__))
+DETAILS_DIR = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
+
+
+def strip_html(html):
+    text = re.sub(r"<br\s*/?>", "\n", html, flags=re.IGNORECASE)
+    text = re.sub(r"<[^>]+>", "", text)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip()
+
+
+def main():
+    existing_pks = get_existing_pks(STUDY)
+    print(f"V DB již existuje {len(existing_pks)} notifikací pro {STUDY}")
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=False, args=["--start-maximized"])
+        context = browser.new_context(no_viewport=True)
+        page = context.new_page()
+
+        print("Přihlašuji se...")
+        page.goto(BASE_URL)
+        page.wait_for_load_state("networkidle")
+        page.get_by_label("Email *").fill(EMAIL)
+        page.get_by_label("Password *").fill(PASSWORD)
+        page.locator("#login__submit").click()
+        page.wait_for_load_state("networkidle")
+
+        page.get_by_label("Study *").click()
+        page.get_by_role("option", name=STUDY).click()
+        page.get_by_role("button", name="SELECT").click()
+        page.wait_for_load_state("networkidle")
+
+        page.goto(f"{BASE_URL}/report/patient_detail_report")
+        page.wait_for_load_state("networkidle", timeout=60000)
+
+        # JWT + api_base
+        jwt = page.evaluate("localStorage.getItem('JWT.access')")
+        print(f"JWT: {jwt[:30]}...")
+        instances = page.evaluate("""async (jwt) => {
+            const res = await fetch('/_/api/dispatch/app_instances/', {
+                headers: { 'Authorization': `Bearer ${jwt}` }
+            });
+            return res.json();
+        }""", jwt)
+        instance = next((i for i in instances if STUDY in i.get("label", "")), None)
+        if not instance:
+            raise ValueError(f"Instance pro {STUDY} nenalezena")
+        api_base = instance["api_base_url"]
+        print(f"API base: {api_base}")
+
+        # Vyber subjekt a zachyť table_1 response přímo
+        print(f"Vybírám subjekt {SUBJECT}...")
+        input_field = page.locator('input[placeholder="search"], input[type="text"]').first
+        input_field.click()
+        input_field.fill(SUBJECT)
+        page.wait_for_timeout(1000)
+
+        captured = {}
+        with page.expect_response(
+            lambda r: "report_data" in r.url and "table_1" in r.url,
+            timeout=60000
+        ) as resp_info:
+            page.locator("mat-option").first.dispatch_event("click")
+
+        response = resp_info.value
+        data = response.json()
+
+        out_dir = os.path.join(DETAILS_DIR, STUDY)
+        os.makedirs(out_dir, exist_ok=True)
+        today = datetime.date.today().strftime("%Y-%m-%d")
+
+        print(f"\n{'='*60}")
+        print(f"Subjekt: {SUBJECT}  |  Studie: {STUDY}")
+        print(f"{'='*60}")
+
+        count = 0
+        for row in data.get("data", []):
+            for notif in (row.get("notification") or []):
+                item  = notif.get("item", {})
+                pk    = item.get("pk")
+                title = item.get("et_title")
+                label = (notif.get("label") or title or "").strip()
+                # Celý label, mezery → podtržítka, nepovolené znaky pryč
+                safe_label = re.sub(r'[\\/*?:"<>|]', "", label).replace(" ", "_")
+                body = item.get("body", "")
+                text = strip_html(body)
+                count += 1
+                print(f"\n--- Notifikace #{count}: {safe_label} (pk={pk}) | event: {row.get('event_event_id')} ---")
+                print(text)
+
+                if pk in existing_pks:
+                    print(f"  → pk={pk} již v DB, přeskakuji")
+                    continue
+
+                actual_date = row.get("actual_date_raw", "0000-00-00")
+                pdf_filename = os.path.join(out_dir, f"{actual_date}_{safe_label}.pdf")
+                if os.path.exists(pdf_filename):
+                    pdf_filename = os.path.join(out_dir, f"{actual_date}_{safe_label}_pk{pk}.pdf")
+
+                pdf_url = f"{BASE_URL}{api_base}/api/v1/meta_api/pdfnotification?pk={pk}&title={title}&html=true"
+                pdf_resp = page.request.get(pdf_url, headers={
+                    "Authorization": f"Bearer {jwt}",
+                    "lang": "en",
+                    "prancer_study": STUDY,
+                    "Accept": "application/json, text/plain, */*",
+                })
+                if pdf_resp.ok:
+                    with open(pdf_filename, "wb") as f:
+                        f.write(pdf_resp.body())
+                    print(f"  → PDF uloženo: {os.path.basename(pdf_filename)}")
+                    json_filename = pdf_filename.replace(".pdf", ".json")
+                    import json
+                    with open(json_filename, "w", encoding="utf-8") as f:
+                        json.dump({
+                            "pk": pk,
+                            "title": title,
+                            "label": label,
+                            "event": row.get("event_event_id"),
+                            "actual_date": actual_date,
+                            "subject": SUBJECT,
+                            "study": STUDY,
+                            "text": text,
+                        }, f, ensure_ascii=False, indent=2)
+                    print(f"  → JSON uloženo: {os.path.basename(json_filename)}")
+                else:
+                    print(f"  → PDF chyba: {pdf_resp.status}")
+                page.wait_for_timeout(300)
+
+        if count == 0:
+            print("Žádné notifikace nalezeny.")
+        else:
+            print(f"\n{'='*60}")
+            print(f"Celkem notifikací: {count}")
+
+        browser.close()
+
+
+main()
@@ -0,0 +1,5 @@
+DB_HOST     = "192.168.1.76"
+DB_PORT     = 3306
+DB_USER     = "root"
+DB_PASSWORD = "Vlado9674+"
+DB_NAME     = "studie"