Add Outlook/Soubory/Clario/Feasibility scripts and reports; ignore Incoming, Outlook downloads & profile
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
"""
|
||||
Jednorázový skript — vytvoří/aktualizuje tabulky v MySQL.
|
||||
Spusť jednou: python create_iwrs_tables.py
|
||||
"""
|
||||
import os
|
||||
import mysql.connector
|
||||
import db_config
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
SQL_FILE = os.path.join(BASE_DIR, "create_iwrs_tables.sql")
|
||||
|
||||
conn = mysql.connector.connect(
|
||||
host=db_config.DB_HOST,
|
||||
port=db_config.DB_PORT,
|
||||
user=db_config.DB_USER,
|
||||
password=db_config.DB_PASSWORD,
|
||||
database=db_config.DB_NAME,
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
|
||||
sql = open(SQL_FILE, encoding="utf-8").read()
|
||||
# Odstraň komentáře a rozdělíme na příkazy
|
||||
stmts = [s.strip() for s in sql.split(";")]
|
||||
for stmt in stmts:
|
||||
# Odstraň řádkové komentáře
|
||||
lines = [l for l in stmt.splitlines() if not l.strip().startswith("--")]
|
||||
stmt = "\n".join(lines).strip()
|
||||
if not stmt or stmt.upper().startswith("USE"):
|
||||
continue
|
||||
try:
|
||||
cursor.execute(stmt)
|
||||
print(f"OK: {stmt[:80]}")
|
||||
except Exception as e:
|
||||
print(f"SKIP: {e}")
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
print("\nHotovo.")
|
||||
@@ -0,0 +1,128 @@
|
||||
-- IWRS tabulky pro databázi studie
|
||||
-- Spustit jednou: mysql -h 192.168.1.76 -u root -p studie < create_iwrs_tables.sql
|
||||
|
||||
USE studie;
|
||||
|
||||
-- ── Import log ───────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS iwrs_import (
|
||||
import_id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
study VARCHAR(20) NOT NULL,
|
||||
imported_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
source_file VARCHAR(500) NOT NULL,
|
||||
INDEX idx_study (study)
|
||||
);
|
||||
|
||||
-- ── UCO3001 subject summary ───────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS iwrs_uco3001_subject_summary (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
import_id INT NOT NULL,
|
||||
subject VARCHAR(20) NOT NULL,
|
||||
prior_subject_identifier VARCHAR(20),
|
||||
site VARCHAR(50),
|
||||
investigator VARCHAR(100),
|
||||
location VARCHAR(50),
|
||||
cohort_per_irt VARCHAR(100),
|
||||
informed_consent_date DATE,
|
||||
adolescent_assent_date DATE,
|
||||
age SMALLINT,
|
||||
weight DECIMAL(5,1),
|
||||
rescreened_subject VARCHAR(10),
|
||||
adt_ir VARCHAR(10),
|
||||
three_or_more_advanced_therapies VARCHAR(10),
|
||||
only_oral_5asa_compounds VARCHAR(10),
|
||||
ustekinumab VARCHAR(10),
|
||||
isolated_proctitis VARCHAR(10),
|
||||
clinical_responder_status_i12_m0 VARCHAR(100),
|
||||
irt_subject_status VARCHAR(50),
|
||||
i0_rand_date_local DATE,
|
||||
last_irt_transaction VARCHAR(100),
|
||||
last_irt_transaction_date_local DATE,
|
||||
last_irt_transaction_date_utc DATE,
|
||||
next_irt_transaction VARCHAR(100),
|
||||
next_irt_transaction_date_local DATE,
|
||||
most_recent_med_assignment_date DATE,
|
||||
days_since_last_med_assignment SMALLINT,
|
||||
patient_forecast_status VARCHAR(50),
|
||||
patient_forecast_status_changed_date DATE,
|
||||
FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
|
||||
INDEX idx_import (import_id),
|
||||
INDEX idx_subject (subject)
|
||||
);
|
||||
|
||||
-- ── MDD3003 subject summary ───────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS iwrs_mdd3003_subject_summary (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
import_id INT NOT NULL,
|
||||
subject VARCHAR(20) NOT NULL,
|
||||
prior_subject_identifier VARCHAR(20),
|
||||
site VARCHAR(50),
|
||||
investigator VARCHAR(100),
|
||||
location VARCHAR(50),
|
||||
cohort_per_irt VARCHAR(50),
|
||||
madrs_criteria_integrated VARCHAR(50),
|
||||
informed_consent_date DATE,
|
||||
age SMALLINT,
|
||||
madrs_criteria_v15 VARCHAR(10),
|
||||
madrs_criteria_v16 VARCHAR(10),
|
||||
madrs_criteria_v17 VARCHAR(10),
|
||||
stratification_country VARCHAR(10),
|
||||
age_group VARCHAR(20),
|
||||
stable_remitters VARCHAR(50),
|
||||
irt_subject_status VARCHAR(100),
|
||||
last_irt_transaction VARCHAR(100),
|
||||
last_irt_transaction_date_local DATE,
|
||||
last_irt_transaction_date_utc DATE,
|
||||
next_irt_transaction VARCHAR(100),
|
||||
next_irt_transaction_date_local DATE,
|
||||
date_screened DATE,
|
||||
date_screen_failed DATE,
|
||||
date_randomized_part1 DATE,
|
||||
date_early_withdraw_randomized_part1 DATE,
|
||||
date_open_label_induction DATE,
|
||||
date_early_withdraw_open_label_induction DATE,
|
||||
date_randomized_part2 DATE,
|
||||
date_early_withdraw_randomized_part2 DATE,
|
||||
date_completed DATE,
|
||||
date_unblinded DATE,
|
||||
FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
|
||||
INDEX idx_import (import_id),
|
||||
INDEX idx_subject (subject)
|
||||
);
|
||||
|
||||
-- ── Notifications ────────────────────────────────────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS iwrs_notifications (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
study VARCHAR(20) NOT NULL,
|
||||
subject VARCHAR(20) NOT NULL,
|
||||
pk INT NOT NULL,
|
||||
title VARCHAR(100),
|
||||
label VARCHAR(500),
|
||||
event VARCHAR(50),
|
||||
actual_date DATE,
|
||||
text TEXT,
|
||||
pdf MEDIUMBLOB,
|
||||
source_file VARCHAR(500),
|
||||
imported_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
UNIQUE KEY uq_pk (pk),
|
||||
INDEX idx_study_subject (study, subject)
|
||||
);
|
||||
|
||||
-- ── Subject visits / transactions (obě studie) ───────────────────────────────
|
||||
CREATE TABLE IF NOT EXISTS iwrs_subject_visits (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
import_id INT NOT NULL,
|
||||
study VARCHAR(20) NOT NULL,
|
||||
subject VARCHAR(20) NOT NULL,
|
||||
visit_type ENUM('Past','Upcoming') NOT NULL,
|
||||
scheduled_date DATE,
|
||||
window_days VARCHAR(20),
|
||||
actual_date DATE,
|
||||
irt_transaction_no SMALLINT,
|
||||
irt_transaction_description VARCHAR(200),
|
||||
medication_assignment VARCHAR(200),
|
||||
quantity_assigned SMALLINT,
|
||||
medication_id VARCHAR(20),
|
||||
FOREIGN KEY (import_id) REFERENCES iwrs_import(import_id),
|
||||
INDEX idx_import (import_id),
|
||||
INDEX idx_study_subject (study, subject)
|
||||
);
|
||||
@@ -0,0 +1,201 @@
|
||||
from playwright.sync_api import sync_playwright
|
||||
import os
|
||||
import glob
|
||||
import datetime
|
||||
import requests
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# ── CONFIG ──────────────────────────────────────────────────────────────────
|
||||
BASE_URL = "https://janssen.4gclinical.com"
|
||||
EMAIL = "vbuzalka@its.jnj.com"
|
||||
PASSWORD = "Vlado123++-+"
|
||||
|
||||
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
|
||||
DETAILS_DIR = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def get_subjects(study):
|
||||
pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
|
||||
files = sorted(
|
||||
[f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
|
||||
key=os.path.getmtime,
|
||||
reverse=True,
|
||||
)
|
||||
if not files:
|
||||
raise FileNotFoundError(f"Nenalezen Subject Summary Report pro {study}")
|
||||
today = datetime.date.today().strftime("%Y-%m-%d")
|
||||
if not os.path.basename(files[0]).startswith(today):
|
||||
raise FileNotFoundError(
|
||||
f"Dnešní Subject Summary Report pro {study} neexistuje — spusť nejdříve download_subject_summary.py"
|
||||
)
|
||||
path = files[0]
|
||||
print(f" Čtu subjekty z: {os.path.basename(path)}")
|
||||
|
||||
raw = pd.read_excel(path, header=None)
|
||||
header_row = None
|
||||
for i, row in raw.iterrows():
|
||||
if "Subject" in [str(v).strip() for v in row]:
|
||||
header_row = i
|
||||
break
|
||||
if header_row is None:
|
||||
raise ValueError("Hlavičkový řádek nenalezen")
|
||||
|
||||
df = pd.read_excel(path, header=header_row)
|
||||
subjects = df["Subject"].dropna().astype(str).str.strip().tolist()
|
||||
return subjects
|
||||
|
||||
|
||||
def get_jwt_and_api_base(page, study):
|
||||
"""Získá JWT token a api_base_url pro danou studii."""
|
||||
jwt = page.evaluate("localStorage.getItem('JWT.access')")
|
||||
if not jwt:
|
||||
raise ValueError("JWT token nenalezen v localStorage")
|
||||
|
||||
instances = page.evaluate("""async (jwt) => {
|
||||
const res = await fetch('/_/api/dispatch/app_instances/', {
|
||||
headers: { 'Authorization': `Bearer ${jwt}` }
|
||||
});
|
||||
return res.json();
|
||||
}""", jwt)
|
||||
|
||||
instance = next(
|
||||
(i for i in instances if study in i.get("label", "")),
|
||||
None
|
||||
)
|
||||
if not instance:
|
||||
raise ValueError(f"app_instance pro studii {study} nenalezena")
|
||||
|
||||
return jwt, instance["api_base_url"]
|
||||
|
||||
|
||||
def get_notifications(jwt, api_base, study, subject):
|
||||
"""Načte seznam notifikací pro daného subjekta přes report_data API."""
|
||||
url = f"{BASE_URL}{api_base}/api/v1/reports_api/report_data"
|
||||
params = {
|
||||
"path": "patient_detail_report",
|
||||
"id": subject,
|
||||
"key": "table_1",
|
||||
"unblinded": "false",
|
||||
}
|
||||
payload = {
|
||||
"path": "patient_detail_report",
|
||||
"study": study,
|
||||
"id": subject,
|
||||
"key": "table_1",
|
||||
"fields": {},
|
||||
"filters": [{"tableId": "table_1", "tableFilters": {}}],
|
||||
"pagination_details": {"order": "type", "reverseOrder": False, "page": 1, "limit": 500},
|
||||
"cache_key": f"py_{subject}_{datetime.datetime.now().timestamp()}",
|
||||
}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {jwt}",
|
||||
"Content-Type": "application/json",
|
||||
"lang": "en",
|
||||
}
|
||||
resp = requests.post(url, params=params, json=payload, headers=headers)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
notifications = []
|
||||
for row in data.get("data", []):
|
||||
for notif in row.get("notification", []):
|
||||
item = notif.get("item", {})
|
||||
pk = item.get("pk")
|
||||
title = item.get("et_title")
|
||||
if pk and title:
|
||||
notifications.append({"pk": pk, "title": title, "event": row.get("event_event_id", "")})
|
||||
return notifications
|
||||
|
||||
|
||||
def download_pdf(jwt, api_base, pk, title, out_path):
|
||||
"""Stáhne PDF notifikaci a uloží ji."""
|
||||
url = f"{BASE_URL}{api_base}/api/v1/meta_api/pdfnotification"
|
||||
params = {"pk": pk, "title": title, "html": "true"}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {jwt}",
|
||||
"lang": "en",
|
||||
"Accept": "*/*",
|
||||
}
|
||||
resp = requests.get(url, params=params, headers=headers)
|
||||
resp.raise_for_status()
|
||||
with open(out_path, "wb") as f:
|
||||
f.write(resp.content)
|
||||
|
||||
|
||||
def run(page, study):
|
||||
out_dir = os.path.join(DETAILS_DIR, study)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
subjects = get_subjects(study)
|
||||
print(f" Nalezeno {len(subjects)} subjektů")
|
||||
today = datetime.date.today().strftime("%Y-%m-%d")
|
||||
|
||||
# Načteme stránku aby byl platný session kontext
|
||||
page.goto(f"{BASE_URL}/report/patient_detail_report")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
jwt, api_base = get_jwt_and_api_base(page, study)
|
||||
print(f" API base: {api_base}")
|
||||
|
||||
for subject in subjects:
|
||||
print(f" [{subject}] Stahuji notifikace...")
|
||||
try:
|
||||
notifications = get_notifications(jwt, api_base, study, subject)
|
||||
if not notifications:
|
||||
print(f" [{subject}] Žádné notifikace")
|
||||
continue
|
||||
|
||||
for notif in notifications:
|
||||
pk = notif["pk"]
|
||||
title = notif["title"]
|
||||
filename = os.path.join(out_dir, f"{today} {study} {subject} Notification {title} pk{pk}.pdf")
|
||||
if os.path.exists(filename):
|
||||
print(f" [{subject}] {title} (pk={pk}) — již existuje, přeskakuji")
|
||||
continue
|
||||
download_pdf(jwt, api_base, pk, title, filename)
|
||||
print(f" [{subject}] {title} (pk={pk}) OK")
|
||||
|
||||
except Exception as e:
|
||||
print(f" [{subject}] CHYBA při notifikacích: {e}")
|
||||
|
||||
print(f" [{study}] Notifikace hotovo.")
|
||||
|
||||
|
||||
def main():
|
||||
os.makedirs(DETAILS_DIR, exist_ok=True)
|
||||
|
||||
with sync_playwright() as p:
|
||||
for study in STUDIES:
|
||||
print(f"\n[{study}] Přihlášení...")
|
||||
browser = p.chromium.launch(headless=False)
|
||||
context = browser.new_context(accept_downloads=True)
|
||||
page = context.new_page()
|
||||
|
||||
page.goto(BASE_URL)
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.get_by_label("Email *").fill(EMAIL)
|
||||
page.get_by_label("Password *").fill(PASSWORD)
|
||||
page.locator("#login__submit").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
page.get_by_label("Study *").click()
|
||||
page.get_by_role("option", name=study).click()
|
||||
page.get_by_role("button", name="SELECT").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
try:
|
||||
run(page, study)
|
||||
except Exception as e:
|
||||
print(f" [{study}] CHYBA: {e}")
|
||||
|
||||
browser.close()
|
||||
|
||||
print("\nVše hotovo.")
|
||||
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,76 @@
|
||||
from playwright.sync_api import sync_playwright
|
||||
import os
|
||||
import datetime
|
||||
|
||||
# ── CONFIG ──────────────────────────────────────────────────────────────────
|
||||
BASE_URL = "https://janssen.4gclinical.com"
|
||||
EMAIL = "vbuzalka@its.jnj.com"
|
||||
PASSWORD = "Vlado123++-+"
|
||||
|
||||
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
|
||||
CREATED_DIR = os.path.join(BASE_DIR, "CreatedReports")
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def unique_path(directory, stem):
|
||||
path = os.path.join(directory, f"{stem}.xlsx")
|
||||
if not os.path.exists(path):
|
||||
return path
|
||||
time_tag = datetime.datetime.now().strftime("%H%M")
|
||||
return os.path.join(directory, f"{stem} {time_tag}.xlsx")
|
||||
|
||||
|
||||
def download_study(page, study, today):
|
||||
print(f"\n[{study}] Prihlaseni...")
|
||||
page.goto(BASE_URL)
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.get_by_label("Email *").fill(EMAIL)
|
||||
page.get_by_label("Password *").fill(PASSWORD)
|
||||
page.locator("#login__submit").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
print(f"[{study}] Vyber studie...")
|
||||
page.get_by_label("Study *").click()
|
||||
page.get_by_role("option", name=study).click()
|
||||
page.get_by_role("button", name="SELECT").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
print(f"[{study}] Stahuji Subject Summary Report...")
|
||||
page.goto(f"{BASE_URL}/report/patient_summary_report")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
|
||||
with page.expect_download(timeout=120000) as dl:
|
||||
page.get_by_role("button", name="Download XLS").click()
|
||||
dl.value.save_as(filename)
|
||||
print(f"[{study}] OK -> {filename}")
|
||||
return filename
|
||||
|
||||
|
||||
def main():
|
||||
today = datetime.date.today().strftime("%Y-%m-%d")
|
||||
os.makedirs(INCOMING_DIR, exist_ok=True)
|
||||
os.makedirs(CREATED_DIR, exist_ok=True)
|
||||
|
||||
downloaded = []
|
||||
|
||||
with sync_playwright() as p:
|
||||
for study in STUDIES:
|
||||
browser = p.chromium.launch(headless=False)
|
||||
context = browser.new_context(accept_downloads=True)
|
||||
page = context.new_page()
|
||||
|
||||
filename = download_study(page, study, today)
|
||||
downloaded.append((study, filename))
|
||||
|
||||
browser.close()
|
||||
|
||||
print("\nVse stazeno:")
|
||||
for study, path in downloaded:
|
||||
print(f" {study}: {path}")
|
||||
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,453 @@
|
||||
"""
|
||||
Importuje data z IWRS Excel reportů do MySQL (databáze studie).
|
||||
|
||||
Pořadí spuštění:
|
||||
1. download_subject_summary.py
|
||||
2. download_subject_details.py
|
||||
3. tento skript
|
||||
|
||||
Každé spuštění vytvoří nový import_id v iwrs_import.
|
||||
Reportovací skripty pracují vždy s MAX(import_id) pro danou studii.
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
import datetime
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import mysql.connector
|
||||
|
||||
import db_config
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
|
||||
DETAILS_DIR = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
|
||||
|
||||
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
|
||||
|
||||
|
||||
# ── helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def get_conn():
|
||||
return mysql.connector.connect(
|
||||
host=db_config.DB_HOST,
|
||||
port=db_config.DB_PORT,
|
||||
user=db_config.DB_USER,
|
||||
password=db_config.DB_PASSWORD,
|
||||
database=db_config.DB_NAME,
|
||||
)
|
||||
|
||||
|
||||
def _py(val):
|
||||
"""Převede numpy skalár na Python nativní typ."""
|
||||
if isinstance(val, np.generic):
|
||||
return val.item()
|
||||
return val
|
||||
|
||||
|
||||
def to_date(val):
|
||||
"""Převede pandas Timestamp / string / NaT / NaN na date nebo None."""
|
||||
val = _py(val)
|
||||
if val is None or (isinstance(val, float) and (val != val)): # NaN check
|
||||
return None
|
||||
try:
|
||||
if pd.isna(val):
|
||||
return None
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
if isinstance(val, pd.Timestamp):
|
||||
return None if pd.isna(val) else val.date()
|
||||
if isinstance(val, datetime.datetime):
|
||||
return val.date()
|
||||
if isinstance(val, datetime.date):
|
||||
return val
|
||||
s = str(val).strip()
|
||||
if not s or s.lower() in ("nat", "nan", "none", ""):
|
||||
return None
|
||||
for fmt in ("%Y-%m-%d", "%d-%b-%Y", "%d-%m-%Y", "%Y-%m-%d %H:%M:%S"):
|
||||
try:
|
||||
return datetime.datetime.strptime(s, fmt).date()
|
||||
except ValueError:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def to_int(val):
|
||||
val = _py(val)
|
||||
try:
|
||||
v = float(val)
|
||||
return None if (v != v) else int(v) # v != v je True jen pro NaN
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def to_float(val):
|
||||
val = _py(val)
|
||||
try:
|
||||
v = float(val)
|
||||
return None if (v != v) else float(v)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def to_str(val):
|
||||
val = _py(val)
|
||||
if val is None:
|
||||
return None
|
||||
if isinstance(val, float) and (val != val): # NaN
|
||||
return None
|
||||
s = str(val).strip()
|
||||
return None if s.lower() in ("nan", "nat", "none", "") else s
|
||||
|
||||
|
||||
def find_summary_file(study):
|
||||
today = datetime.date.today().strftime("%Y-%m-%d")
|
||||
pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
|
||||
files = sorted(
|
||||
[f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
|
||||
key=os.path.getmtime,
|
||||
reverse=True,
|
||||
)
|
||||
if not files:
|
||||
raise FileNotFoundError(f"Nenalezen Subject Summary Report pro {study}")
|
||||
if not os.path.basename(files[0]).startswith(today):
|
||||
print(f" UPOZORNĚNÍ: nejnovější Summary Report pro {study} není z dnešního dne ({os.path.basename(files[0])[:10]})")
|
||||
return files[0]
|
||||
|
||||
|
||||
def read_summary_df(path):
|
||||
"""Přečte Summary xlsx, vrátí DataFrame od řádku s hlavičkou."""
|
||||
raw = pd.read_excel(path, header=None)
|
||||
header_row = None
|
||||
for i, row in raw.iterrows():
|
||||
if "Subject" in [str(v).strip() for v in row]:
|
||||
header_row = i
|
||||
break
|
||||
if header_row is None:
|
||||
raise ValueError(f"Hlavičkový řádek nenalezen v {path}")
|
||||
return pd.read_excel(path, header=header_row)
|
||||
|
||||
|
||||
def find_detail_files(study):
|
||||
out_dir = os.path.join(DETAILS_DIR, study)
|
||||
# Vezme soubory ze stejného dne jako nejnovější Summary Report
|
||||
summary_path = find_summary_file(study)
|
||||
file_date = os.path.basename(summary_path)[:10] # "YYYY-MM-DD"
|
||||
pattern = os.path.join(out_dir, f"{file_date} {study} * Subject Detail.xlsx")
|
||||
files = [f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")]
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def parse_detail_visits(path):
|
||||
"""
|
||||
Vrátí list slovníků s daty visitů z Detail xlsx.
|
||||
Každý řádek tabulky (od řádku s hlavičkou Visit Type) je jedna transakce.
|
||||
"""
|
||||
df = pd.read_excel(path, sheet_name="patient_detail_report", header=None)
|
||||
|
||||
header_row = None
|
||||
for i, row in df.iterrows():
|
||||
if "Visit Type" in [str(v).strip() for v in row]:
|
||||
header_row = i
|
||||
break
|
||||
if header_row is None:
|
||||
return []
|
||||
|
||||
visits_df = df.iloc[header_row + 1:].copy()
|
||||
visits_df.columns = range(visits_df.shape[1])
|
||||
|
||||
rows = []
|
||||
for _, r in visits_df.iterrows():
|
||||
visit_type = to_str(r.get(0))
|
||||
if visit_type not in ("Past", "Upcoming"):
|
||||
continue
|
||||
rows.append({
|
||||
"visit_type": visit_type,
|
||||
"scheduled_date": to_date(r.get(1)),
|
||||
"window_days": to_str(r.get(2)),
|
||||
"actual_date": to_date(r.get(3)),
|
||||
"irt_transaction_no": to_int(r.get(4)),
|
||||
"irt_transaction_description": to_str(r.get(5)),
|
||||
"medication_assignment": to_str(r.get(6)),
|
||||
"quantity_assigned": to_int(r.get(7)),
|
||||
"medication_id": to_str(r.get(8)),
|
||||
})
|
||||
return rows
|
||||
|
||||
|
||||
# ── insert helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
def insert_import(cursor, study, source_file):
|
||||
cursor.execute(
|
||||
"INSERT INTO iwrs_import (study, imported_at, source_file) VALUES (%s, %s, %s)",
|
||||
(study, datetime.datetime.now(), os.path.basename(source_file)),
|
||||
)
|
||||
return cursor.lastrowid
|
||||
|
||||
|
||||
def insert_uco3001_summary(cursor, import_id, df):
|
||||
sql = """
|
||||
INSERT INTO iwrs_uco3001_subject_summary (
|
||||
import_id, subject, prior_subject_identifier, site, investigator, location,
|
||||
cohort_per_irt, informed_consent_date, adolescent_assent_date, age, weight,
|
||||
rescreened_subject, adt_ir, three_or_more_advanced_therapies,
|
||||
only_oral_5asa_compounds, ustekinumab, isolated_proctitis,
|
||||
clinical_responder_status_i12_m0, irt_subject_status,
|
||||
i0_rand_date_local, last_irt_transaction,
|
||||
last_irt_transaction_date_local, last_irt_transaction_date_utc,
|
||||
next_irt_transaction, next_irt_transaction_date_local,
|
||||
most_recent_med_assignment_date, days_since_last_med_assignment,
|
||||
patient_forecast_status, patient_forecast_status_changed_date
|
||||
) VALUES (
|
||||
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
|
||||
)
|
||||
"""
|
||||
col = df.columns.tolist()
|
||||
|
||||
def c(name):
|
||||
return col.index(name) if name in col else None
|
||||
|
||||
for _, r in df.iterrows():
|
||||
cursor.execute(sql, (
|
||||
import_id,
|
||||
to_str(r["Subject"]),
|
||||
to_str(r["Prior Subject Identifier"]) if "Prior Subject Identifier" in col else None,
|
||||
to_str(r["Site"]),
|
||||
to_str(r["Investigator"]),
|
||||
to_str(r["Location"]),
|
||||
to_str(r["Cohort per IRT"]),
|
||||
to_date(r["Informed Consent Date"]),
|
||||
to_date(r["Adolescent Assent Date"]) if "Adolescent Assent Date" in col else None,
|
||||
to_int(r["Subject's age collection"]),
|
||||
to_float(r["Subject's weight collection"]) if "Subject's weight collection" in col else None,
|
||||
to_str(r["Rescreened Subject"]) if "Rescreened Subject" in col else None,
|
||||
to_str(r["ADT-IR"]) if "ADT-IR" in col else None,
|
||||
to_str(r["3 or More Advanced Therapies"]) if "3 or More Advanced Therapies" in col else None,
|
||||
to_str(r["Only Oral 5-ASA Compounds"]) if "Only Oral 5-ASA Compounds" in col else None,
|
||||
to_str(r["Ustekinumab"]) if "Ustekinumab" in col else None,
|
||||
to_str(r["Isolated Proctitis"]) if "Isolated Proctitis" in col else None,
|
||||
to_str(r["Clinical Responder Status at I-12 / M-0"]) if "Clinical Responder Status at I-12 / M-0" in col else None,
|
||||
to_str(r["IRT Subject Status"]),
|
||||
to_date(r["I0_RAND_TIMESTAMP_LOCAL [Local]"]) if "I0_RAND_TIMESTAMP_LOCAL [Local]" in col else None,
|
||||
to_str(r["Last Recorded IRT Transaction"]),
|
||||
to_date(r["Last Recorded IRT Transaction Date [Local]"]),
|
||||
to_date(r["Last Recorded IRT Transaction Date (UTC)"]),
|
||||
to_str(r["Next Expected IRT Transaction"]),
|
||||
to_date(r["Next Expected IRT Transaction Date [Local]"]),
|
||||
to_date(r["Most Recent Medication Assignment Transaction [Local]"]) if "Most Recent Medication Assignment Transaction [Local]" in col else None,
|
||||
to_int(r["Days Since Last Medication Assignment Transaction"]) if "Days Since Last Medication Assignment Transaction" in col else None,
|
||||
to_str(r["Patient Forecast Status"]) if "Patient Forecast Status" in col else None,
|
||||
to_date(r["Patient Forecast Status Changed Date (UTC)"]) if "Patient Forecast Status Changed Date (UTC)" in col else None,
|
||||
))
|
||||
|
||||
|
||||
def insert_mdd3003_summary(cursor, import_id, df):
|
||||
sql = """
|
||||
INSERT INTO iwrs_mdd3003_subject_summary (
|
||||
import_id, subject, prior_subject_identifier, site, investigator, location,
|
||||
cohort_per_irt, madrs_criteria_integrated, informed_consent_date, age,
|
||||
madrs_criteria_v15, madrs_criteria_v16, madrs_criteria_v17,
|
||||
stratification_country, age_group, stable_remitters, irt_subject_status,
|
||||
last_irt_transaction, last_irt_transaction_date_local,
|
||||
last_irt_transaction_date_utc, next_irt_transaction,
|
||||
next_irt_transaction_date_local, date_screened, date_screen_failed,
|
||||
date_randomized_part1, date_early_withdraw_randomized_part1,
|
||||
date_open_label_induction, date_early_withdraw_open_label_induction,
|
||||
date_randomized_part2, date_early_withdraw_randomized_part2,
|
||||
date_completed, date_unblinded
|
||||
) VALUES (
|
||||
%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
|
||||
)
|
||||
"""
|
||||
col = df.columns.tolist()
|
||||
|
||||
for _, r in df.iterrows():
|
||||
cursor.execute(sql, (
|
||||
import_id,
|
||||
to_str(r["Subject"]),
|
||||
to_str(r["Prior Subject Identifier"]) if "Prior Subject Identifier" in col else None,
|
||||
to_str(r["Site"]),
|
||||
to_str(r["Investigator"]),
|
||||
to_str(r["Location"]),
|
||||
to_str(r["Cohort per IRT"]),
|
||||
to_str(r["MADRS response criteria integrated or manually entered"]) if "MADRS response criteria integrated or manually entered" in col else None,
|
||||
to_date(r["Informed Consent Date"]),
|
||||
to_int(r["Subject's age collection"]),
|
||||
to_str(r["MADRS response criteria v1.5 from RAVE"]) if "MADRS response criteria v1.5 from RAVE" in col else None,
|
||||
to_str(r["MADRS response criteria v1.6 from RAVE"]) if "MADRS response criteria v1.6 from RAVE" in col else None,
|
||||
to_str(r["MADRS response criteria v1.7 from RAVE"]) if "MADRS response criteria v1.7 from RAVE" in col else None,
|
||||
to_str(r["Stratification Country"]) if "Stratification Country" in col else None,
|
||||
to_str(r["Age Group"]) if "Age Group" in col else None,
|
||||
to_str(r["Stable Remitters vs. Non Stable Remitters"]) if "Stable Remitters vs. Non Stable Remitters" in col else None,
|
||||
to_str(r["IRT Subject Status"]),
|
||||
to_str(r["Last Recorded IRT Transaction"]),
|
||||
to_date(r["Last Recorded IRT Transaction Date [Local]"]),
|
||||
to_date(r["Last Recorded IRT Transaction Date (UTC)"]),
|
||||
to_str(r["Next Expected IRT Transaction"]),
|
||||
to_date(r["Next Expected IRT Transaction Date [Local]"]),
|
||||
to_date(r["Date Screened [Local]"]) if "Date Screened [Local]" in col else None,
|
||||
to_date(r["Date Screen Failed [Local]"]) if "Date Screen Failed [Local]" in col else None,
|
||||
to_date(r["Date Randomized Part 1 [Local]"]) if "Date Randomized Part 1 [Local]" in col else None,
|
||||
to_date(r["Date Early Withdraw Randomized Part 1 [Local]"]) if "Date Early Withdraw Randomized Part 1 [Local]" in col else None,
|
||||
to_date(r["Date Open Label Induction [Local]"]) if "Date Open Label Induction [Local]" in col else None,
|
||||
to_date(r["Date Early Withdraw Open Label Induction [Local]"]) if "Date Early Withdraw Open Label Induction [Local]" in col else None,
|
||||
to_date(r["Date Randomized Part 2 [Local]"]) if "Date Randomized Part 2 [Local]" in col else None,
|
||||
to_date(r["Date Early Withdraw Randomized Part 2 [Local]"]) if "Date Early Withdraw Randomized Part 2 [Local]" in col else None,
|
||||
to_date(r["Date Completed [Local]"]) if "Date Completed [Local]" in col else None,
|
||||
to_date(r["Date Unblinded [Local]"]) if "Date Unblinded [Local]" in col else None,
|
||||
))
|
||||
|
||||
|
||||
def insert_visits(cursor, import_id, study, subject, visits):
|
||||
if not visits:
|
||||
return
|
||||
sql = """
|
||||
INSERT INTO iwrs_subject_visits (
|
||||
import_id, study, subject, visit_type, scheduled_date, window_days,
|
||||
actual_date, irt_transaction_no, irt_transaction_description,
|
||||
medication_assignment, quantity_assigned, medication_id
|
||||
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
||||
"""
|
||||
for v in visits:
|
||||
cursor.execute(sql, (
|
||||
import_id, study, subject,
|
||||
v["visit_type"], v["scheduled_date"], v["window_days"],
|
||||
v["actual_date"], v["irt_transaction_no"],
|
||||
v["irt_transaction_description"], v["medication_assignment"],
|
||||
v["quantity_assigned"], v["medication_id"],
|
||||
))
|
||||
|
||||
|
||||
# ── notifications ─────────────────────────────────────────────────────────────
|
||||
|
||||
def find_notification_json_files(study):
|
||||
"""Najde všechny .json soubory notifikací pro danou studii."""
|
||||
out_dir = os.path.join(DETAILS_DIR, study)
|
||||
return sorted(glob.glob(os.path.join(out_dir, "*.json")))
|
||||
|
||||
|
||||
def import_notifications(conn, study):
|
||||
import json as json_lib
|
||||
json_files = find_notification_json_files(study)
|
||||
if not json_files:
|
||||
print(f" Žádné notifikace k importu pro {study}")
|
||||
return 0
|
||||
|
||||
sql = """
|
||||
INSERT INTO iwrs_notifications
|
||||
(study, subject, pk, title, label, event, actual_date, text, pdf, source_file)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
label = VALUES(label),
|
||||
text = VALUES(text),
|
||||
pdf = VALUES(pdf),
|
||||
source_file = VALUES(source_file)
|
||||
"""
|
||||
|
||||
done_dir = os.path.join(os.path.join(DETAILS_DIR, study), "Zpracováno")
|
||||
os.makedirs(done_dir, exist_ok=True)
|
||||
|
||||
cursor = conn.cursor()
|
||||
count = 0
|
||||
for json_path in json_files:
|
||||
try:
|
||||
with open(json_path, "r", encoding="utf-8") as f:
|
||||
meta = json_lib.load(f)
|
||||
|
||||
pdf_path = json_path.replace(".json", ".pdf")
|
||||
pdf_data = None
|
||||
if os.path.exists(pdf_path):
|
||||
with open(pdf_path, "rb") as f:
|
||||
pdf_data = f.read()
|
||||
|
||||
cursor.execute(sql, (
|
||||
meta.get("study", study),
|
||||
meta.get("subject"),
|
||||
meta.get("pk"),
|
||||
meta.get("title"),
|
||||
meta.get("label"),
|
||||
meta.get("event"),
|
||||
to_date(meta.get("actual_date")),
|
||||
meta.get("text"),
|
||||
pdf_data,
|
||||
os.path.basename(json_path),
|
||||
))
|
||||
count += 1
|
||||
|
||||
# Přesun do Zpracováno
|
||||
import shutil
|
||||
shutil.move(json_path, os.path.join(done_dir, os.path.basename(json_path)))
|
||||
if os.path.exists(pdf_path):
|
||||
shutil.move(pdf_path, os.path.join(done_dir, os.path.basename(pdf_path)))
|
||||
|
||||
except Exception as e:
|
||||
print(f" CHYBA při importu {os.path.basename(json_path)}: {e}")
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
print(f" Notifikací uloženo/přesunuto: {count}")
|
||||
return count
|
||||
|
||||
|
||||
# ── main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def import_study(conn, study):
|
||||
summary_path = find_summary_file(study)
|
||||
print(f" Summary: {os.path.basename(summary_path)}")
|
||||
|
||||
df_summary = read_summary_df(summary_path)
|
||||
df_summary = df_summary.dropna(how="all")
|
||||
|
||||
detail_files = find_detail_files(study)
|
||||
print(f" Detail souborů: {len(detail_files)}")
|
||||
|
||||
cursor = conn.cursor()
|
||||
import_id = insert_import(cursor, study, summary_path)
|
||||
print(f" import_id = {import_id}")
|
||||
|
||||
if study == "77242113UCO3001":
|
||||
insert_uco3001_summary(cursor, import_id, df_summary)
|
||||
else:
|
||||
insert_mdd3003_summary(cursor, import_id, df_summary)
|
||||
print(f" Summary řádků: {len(df_summary)}")
|
||||
|
||||
visited = 0
|
||||
for path in detail_files:
|
||||
fname = os.path.basename(path)
|
||||
# název: "2026-05-04 77242113UCO3001 CZ100012001 Subject Detail.xlsx"
|
||||
m = re.search(r"\d{4}-\d{2}-\d{2} \S+ (\S+) Subject Detail\.xlsx", fname)
|
||||
subject = m.group(1) if m else "UNKNOWN"
|
||||
visits = parse_detail_visits(path)
|
||||
insert_visits(cursor, import_id, study, subject, visits)
|
||||
visited += len(visits)
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
print(f" Transakce uloženo: {visited}")
|
||||
return import_id
|
||||
|
||||
|
||||
def main():
|
||||
conn = get_conn()
|
||||
print("Připojeno k MySQL.\n")
|
||||
|
||||
for study in STUDIES:
|
||||
print(f"[{study}]")
|
||||
try:
|
||||
import_id = import_study(conn, study)
|
||||
print(f" OK — import_id {import_id}")
|
||||
except Exception as e:
|
||||
print(f" CHYBA: {e}")
|
||||
try:
|
||||
import_notifications(conn, study)
|
||||
except Exception as e:
|
||||
print(f" CHYBA notifikace: {e}")
|
||||
print()
|
||||
|
||||
conn.close()
|
||||
print("Hotovo.")
|
||||
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,175 @@
|
||||
"""
|
||||
Kompletní pipeline:
|
||||
1. Stažení Subject Summary Reportů (obě studie)
|
||||
2. Stažení Subject Detail Reportů + notifikací (obě studie)
|
||||
3. Import do MongoDB (subject_summary + visits + notifications)
|
||||
|
||||
Spusť tento skript místo samostatných skriptů.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import glob
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
import download_subject_details as dsd
|
||||
import import_to_mongo
|
||||
import import_notifications_to_mongo
|
||||
|
||||
# ── CONFIG ───────────────────────────────────────────────────────────────────
|
||||
BASE_URL = "https://janssen.4gclinical.com"
|
||||
EMAIL = "vbuzalka@its.jnj.com"
|
||||
PASSWORD = "Vlado123++-+"
|
||||
|
||||
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
|
||||
DETAILS_DIR = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
|
||||
|
||||
|
||||
# ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def unique_path(directory, stem):
|
||||
path = os.path.join(directory, f"{stem}.xlsx")
|
||||
if not os.path.exists(path):
|
||||
return path
|
||||
time_tag = datetime.datetime.now().strftime("%H%M")
|
||||
return os.path.join(directory, f"{stem} {time_tag}.xlsx")
|
||||
|
||||
|
||||
def login(page, study):
|
||||
page.goto(BASE_URL)
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.get_by_label("Email *").fill(EMAIL)
|
||||
page.get_by_label("Password *").fill(PASSWORD)
|
||||
page.locator("#login__submit").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.get_by_label("Study *").click()
|
||||
page.get_by_role("option", name=study).click()
|
||||
page.get_by_role("button", name="SELECT").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
|
||||
# ── KROK 1: Subject Summary ───────────────────────────────────────────────────
|
||||
|
||||
def download_summary(page, study, today):
|
||||
print(f" [{study}] Stahuji Subject Summary Report...")
|
||||
page.goto(f"{BASE_URL}/report/patient_summary_report")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
|
||||
with page.expect_download(timeout=120000) as dl:
|
||||
page.get_by_role("button", name="Download XLS").click()
|
||||
dl.value.save_as(filename)
|
||||
print(f" [{study}] Summary OK -> {os.path.basename(filename)}")
|
||||
return filename
|
||||
|
||||
|
||||
# ── KROK 2: Subject Details ───────────────────────────────────────────────────
|
||||
|
||||
def get_subjects_from_summary(summary_path):
|
||||
import pandas as pd
|
||||
raw = pd.read_excel(summary_path, header=None)
|
||||
header_row = None
|
||||
for i, row in raw.iterrows():
|
||||
if "Subject" in [str(v).strip() for v in row]:
|
||||
header_row = i
|
||||
break
|
||||
if header_row is None:
|
||||
raise ValueError("Hlavičkový řádek nenalezen")
|
||||
df = pd.read_excel(summary_path, header=header_row)
|
||||
return df["Subject"].dropna().astype(str).str.strip().tolist()
|
||||
|
||||
|
||||
def download_details(page, study, summary_path, today):
|
||||
out_dir = os.path.join(DETAILS_DIR, study)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
subjects = get_subjects_from_summary(summary_path)
|
||||
print(f" [{study}] Subjektů k stažení: {len(subjects)}")
|
||||
|
||||
page.goto(f"{BASE_URL}/report/patient_detail_report")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
for subject in subjects:
|
||||
filename = os.path.join(out_dir, f"{today} {study} {subject} Subject Detail.xlsx")
|
||||
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
|
||||
input_field.click()
|
||||
input_field.fill(subject)
|
||||
page.wait_for_timeout(500)
|
||||
page.locator("mat-option").first.dispatch_event("click")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
with page.expect_download(timeout=120000) as dl:
|
||||
page.get_by_role("button", name="Download XLS").click()
|
||||
dl.value.save_as(filename)
|
||||
print(f" [{study}] Detail {subject} OK")
|
||||
|
||||
page.get_by_role("button", name="Clear").click()
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
|
||||
# ── KROK 3: Import do MongoDB ────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
today = datetime.date.today().strftime("%Y-%m-%d")
|
||||
os.makedirs(INCOMING_DIR, exist_ok=True)
|
||||
os.makedirs(DETAILS_DIR, exist_ok=True)
|
||||
|
||||
summary_paths = {}
|
||||
|
||||
# Krok 1 + 2: stahování (Playwright, každá studie zvlášť kvůli session)
|
||||
with sync_playwright() as p:
|
||||
for study in STUDIES:
|
||||
print("\n" + "=" * 60)
|
||||
print(f"[{study}] KROK 1: Subject Summary Report")
|
||||
print("=" * 60)
|
||||
browser = p.chromium.launch(headless=False)
|
||||
context = browser.new_context(accept_downloads=True)
|
||||
page = context.new_page()
|
||||
|
||||
try:
|
||||
login(page, study)
|
||||
summary_path = download_summary(page, study, today)
|
||||
summary_paths[study] = summary_path
|
||||
|
||||
print(f"\n[{study}] KROK 2: Subject Detail Reports + notifikace")
|
||||
dsd.run(page, study)
|
||||
|
||||
except Exception as e:
|
||||
print(f" [{study}] CHYBA při stahování: {e}")
|
||||
summary_paths[study] = None
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
# Krok 3: import do MongoDB
|
||||
print("\n" + "=" * 60)
|
||||
print("KROK 3: Import do MongoDB")
|
||||
print("=" * 60)
|
||||
|
||||
for study in STUDIES:
|
||||
summary_path = summary_paths.get(study)
|
||||
if not summary_path:
|
||||
print(f" [{study}] PŘESKOČENO — stahování selhalo")
|
||||
continue
|
||||
|
||||
try:
|
||||
import_to_mongo.run(study, summary_path, DETAILS_DIR, today)
|
||||
except Exception as e:
|
||||
print(f" [{study}] CHYBA při importu summary/visits: {e}")
|
||||
|
||||
# Notifikace: PDF/JSON z disku rovnou do Mongo iwrs_notifications
|
||||
print("\n [notifikace] import PDF/JSON do Mongo...")
|
||||
try:
|
||||
import_notifications_to_mongo.main(STUDIES)
|
||||
except Exception as e:
|
||||
print(f" CHYBA při importu notifikací: {e}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Vše hotovo.")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,172 @@
|
||||
from playwright.sync_api import sync_playwright
|
||||
import re
|
||||
import os
|
||||
import datetime
|
||||
import mysql.connector
|
||||
import db_config
|
||||
|
||||
|
||||
def get_existing_pks(study):
|
||||
"""Vrátí set pk notifikací které už jsou v DB pro danou studii."""
|
||||
try:
|
||||
conn = mysql.connector.connect(
|
||||
host=db_config.DB_HOST, port=db_config.DB_PORT,
|
||||
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
|
||||
database=db_config.DB_NAME,
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT pk FROM iwrs_notifications WHERE study = %s", (study,))
|
||||
pks = {row[0] for row in cursor.fetchall()}
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return pks
|
||||
except Exception as e:
|
||||
print(f" UPOZORNĚNÍ: nelze načíst existující pk z DB ({e}), stahuji vše")
|
||||
return set()
|
||||
|
||||
BASE_URL = "https://janssen.4gclinical.com"
|
||||
EMAIL = "vbuzalka@its.jnj.com"
|
||||
PASSWORD = "Vlado123++-+"
|
||||
|
||||
STUDY = "77242113UCO3001"
|
||||
SUBJECT = "CZ100222003"
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
DETAILS_DIR = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
|
||||
|
||||
|
||||
def strip_html(html):
|
||||
text = re.sub(r"<br\s*/?>", "\n", html, flags=re.IGNORECASE)
|
||||
text = re.sub(r"<[^>]+>", "", text)
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def main():
|
||||
existing_pks = get_existing_pks(STUDY)
|
||||
print(f"V DB již existuje {len(existing_pks)} notifikací pro {STUDY}")
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False, args=["--start-maximized"])
|
||||
context = browser.new_context(no_viewport=True)
|
||||
page = context.new_page()
|
||||
|
||||
print("Přihlašuji se...")
|
||||
page.goto(BASE_URL)
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.get_by_label("Email *").fill(EMAIL)
|
||||
page.get_by_label("Password *").fill(PASSWORD)
|
||||
page.locator("#login__submit").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
page.get_by_label("Study *").click()
|
||||
page.get_by_role("option", name=STUDY).click()
|
||||
page.get_by_role("button", name="SELECT").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
page.goto(f"{BASE_URL}/report/patient_detail_report")
|
||||
page.wait_for_load_state("networkidle", timeout=60000)
|
||||
|
||||
# JWT + api_base
|
||||
jwt = page.evaluate("localStorage.getItem('JWT.access')")
|
||||
print(f"JWT: {jwt[:30]}...")
|
||||
instances = page.evaluate("""async (jwt) => {
|
||||
const res = await fetch('/_/api/dispatch/app_instances/', {
|
||||
headers: { 'Authorization': `Bearer ${jwt}` }
|
||||
});
|
||||
return res.json();
|
||||
}""", jwt)
|
||||
instance = next((i for i in instances if STUDY in i.get("label", "")), None)
|
||||
if not instance:
|
||||
raise ValueError(f"Instance pro {STUDY} nenalezena")
|
||||
api_base = instance["api_base_url"]
|
||||
print(f"API base: {api_base}")
|
||||
|
||||
# Vyber subjekt a zachyť table_1 response přímo
|
||||
print(f"Vybírám subjekt {SUBJECT}...")
|
||||
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
|
||||
input_field.click()
|
||||
input_field.fill(SUBJECT)
|
||||
page.wait_for_timeout(1000)
|
||||
|
||||
captured = {}
|
||||
with page.expect_response(
|
||||
lambda r: "report_data" in r.url and "table_1" in r.url,
|
||||
timeout=60000
|
||||
) as resp_info:
|
||||
page.locator("mat-option").first.dispatch_event("click")
|
||||
|
||||
response = resp_info.value
|
||||
data = response.json()
|
||||
|
||||
out_dir = os.path.join(DETAILS_DIR, STUDY)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
today = datetime.date.today().strftime("%Y-%m-%d")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Subjekt: {SUBJECT} | Studie: {STUDY}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
count = 0
|
||||
for row in data.get("data", []):
|
||||
for notif in (row.get("notification") or []):
|
||||
item = notif.get("item", {})
|
||||
pk = item.get("pk")
|
||||
title = item.get("et_title")
|
||||
label = (notif.get("label") or title or "").strip()
|
||||
# Celý label, mezery → podtržítka, nepovolené znaky pryč
|
||||
safe_label = re.sub(r'[\\/*?:"<>|]', "", label).replace(" ", "_")
|
||||
body = item.get("body", "")
|
||||
text = strip_html(body)
|
||||
count += 1
|
||||
print(f"\n--- Notifikace #{count}: {safe_label} (pk={pk}) | event: {row.get('event_event_id')} ---")
|
||||
print(text)
|
||||
|
||||
if pk in existing_pks:
|
||||
print(f" → pk={pk} již v DB, přeskakuji")
|
||||
continue
|
||||
|
||||
actual_date = row.get("actual_date_raw", "0000-00-00")
|
||||
pdf_filename = os.path.join(out_dir, f"{actual_date}_{safe_label}.pdf")
|
||||
if os.path.exists(pdf_filename):
|
||||
pdf_filename = os.path.join(out_dir, f"{actual_date}_{safe_label}_pk{pk}.pdf")
|
||||
|
||||
pdf_url = f"{BASE_URL}{api_base}/api/v1/meta_api/pdfnotification?pk={pk}&title={title}&html=true"
|
||||
pdf_resp = page.request.get(pdf_url, headers={
|
||||
"Authorization": f"Bearer {jwt}",
|
||||
"lang": "en",
|
||||
"prancer_study": STUDY,
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
})
|
||||
if pdf_resp.ok:
|
||||
with open(pdf_filename, "wb") as f:
|
||||
f.write(pdf_resp.body())
|
||||
print(f" → PDF uloženo: {os.path.basename(pdf_filename)}")
|
||||
json_filename = pdf_filename.replace(".pdf", ".json")
|
||||
import json
|
||||
with open(json_filename, "w", encoding="utf-8") as f:
|
||||
json.dump({
|
||||
"pk": pk,
|
||||
"title": title,
|
||||
"label": label,
|
||||
"event": row.get("event_event_id"),
|
||||
"actual_date": actual_date,
|
||||
"subject": SUBJECT,
|
||||
"study": STUDY,
|
||||
"text": text,
|
||||
}, f, ensure_ascii=False, indent=2)
|
||||
print(f" → JSON uloženo: {os.path.basename(json_filename)}")
|
||||
else:
|
||||
print(f" → PDF chyba: {pdf_resp.status}")
|
||||
page.wait_for_timeout(300)
|
||||
|
||||
if count == 0:
|
||||
print("Žádné notifikace nalezeny.")
|
||||
else:
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Celkem notifikací: {count}")
|
||||
|
||||
browser.close()
|
||||
|
||||
|
||||
main()
|
||||
Reference in New Issue
Block a user