This commit is contained in:
2026-06-10 09:25:49 +02:00
parent bc9b874f3b
commit 30f045e350
26 changed files with 1 additions and 1 deletions
+90
View File
@@ -0,0 +1,90 @@
"""
Stažení reportů z IWRS portálu — vše do jednoho adresáře `Incoming/`.
1. Subject Summary Report (per studie)
2. Subject Detail Reports + notifikace (per subjekt)
Import se spouští samostatně skriptem `import_all.py`.
"""
import os
import datetime
from playwright.sync_api import sync_playwright
import download_subject_details as dsd
# ── CONFIG ───────────────────────────────────────────────────────────────────
BASE_URL = "https://janssen.4gclinical.com"
EMAIL = "vbuzalka@its.jnj.com"
PASSWORD = "Vlado123++-+"
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
INCOMING_DIR = os.path.join(BASE_DIR, "Incoming")
def unique_path(directory, stem, ext=".xlsx"):
path = os.path.join(directory, f"{stem}{ext}")
if not os.path.exists(path):
return path
time_tag = datetime.datetime.now().strftime("%H%M")
return os.path.join(directory, f"{stem} {time_tag}{ext}")
def login(page, study):
page.goto(BASE_URL)
page.wait_for_load_state("networkidle")
page.get_by_label("Email *").fill(EMAIL)
page.get_by_label("Password *").fill(PASSWORD)
page.locator("#login__submit").click()
page.wait_for_load_state("networkidle")
page.get_by_label("Study *").click()
page.get_by_role("option", name=study).click()
page.get_by_role("button", name="SELECT").click()
page.wait_for_load_state("networkidle")
def download_summary(page, study, today):
print(f" [{study}] Stahuji Subject Summary Report...")
page.goto(f"{BASE_URL}/report/patient_summary_report")
page.wait_for_load_state("networkidle", timeout=120000)
filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
with page.expect_download(timeout=120000) as dl:
page.get_by_role("button", name="Download XLS").click()
dl.value.save_as(filename)
print(f" [{study}] Summary OK -> {os.path.basename(filename)}")
return filename
def main():
today = datetime.date.today().strftime("%Y-%m-%d")
os.makedirs(INCOMING_DIR, exist_ok=True)
with sync_playwright() as p:
for study in STUDIES:
print("\n" + "=" * 60)
print(f"[{study}] Stažení reportů")
print("=" * 60)
browser = p.chromium.launch(headless=False)
context = browser.new_context(accept_downloads=True)
page = context.new_page()
try:
login(page, study)
download_summary(page, study, today)
# detail XLSX + notifikace přímo do Incoming/
dsd.run(page, study, out_dir=INCOMING_DIR, subjects_source_dir=INCOMING_DIR)
except Exception as e:
print(f" [{study}] CHYBA: {e}")
finally:
browser.close()
print("\n" + "=" * 60)
print(f"Stahování hotovo. Soubory v: {INCOMING_DIR}")
print("Pro import spusť: python import_all.py")
print("=" * 60)
if __name__ == "__main__":
main()
+107
View File
@@ -0,0 +1,107 @@
"""
Import všech čekajících reportů z `Incoming/` do MongoDB.
Pořadí zpracování per typ + studie: nejstarší soubor podle mtime první
(důležité pro chronologickou správnost snapshotů).
Po úspěšném importu se soubor přesune do `Incoming/Zpracováno/`.
Při chybě zůstane soubor v `Incoming/`.
"""
import os
import sys
import glob
import shutil
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common.mongo_writer import ensure_indexes
import import_to_mongo
import import_notifications_to_mongo
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
INCOMING_DIR = os.path.join(BASE_DIR, "Incoming")
DONE_DIR = os.path.join(INCOMING_DIR, "Zpracováno")
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
def _move_done(path):
os.makedirs(DONE_DIR, exist_ok=True)
dst = os.path.join(DONE_DIR, os.path.basename(path))
# kolize → přepiš (Mongo už má aktuální data, soubor je jen archiv)
if os.path.exists(dst):
os.remove(dst)
shutil.move(path, dst)
def _sorted_by_mtime(paths):
"""Nejstarší první."""
return sorted(
(p for p in paths if not os.path.basename(p).startswith("~$")),
key=os.path.getmtime,
)
def import_summaries(study):
pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report*.xlsx")
files = _sorted_by_mtime(glob.glob(pattern))
if not files:
print(f" [{study}] summary: nic ke zpracování")
return
print(f" [{study}] summary: {len(files)} soubor(ů) (oldest first)")
for path in files:
try:
import_to_mongo.import_subject_summary(study, path)
_move_done(path)
except Exception as e:
print(f" [{study}] CHYBA summary {os.path.basename(path)}: {e}")
def import_details(study):
pattern = os.path.join(INCOMING_DIR, f"* {study} * Subject Detail.xlsx")
files = _sorted_by_mtime(glob.glob(pattern))
if not files:
print(f" [{study}] detail: nic ke zpracování")
return
print(f" [{study}] detail: {len(files)} soubor(ů) (oldest first)")
for path in files:
parsed = import_to_mongo.parse_detail_filename(path)
if not parsed:
print(f" [{study}] PŘESKAKUJI (nelze parsovat název): {os.path.basename(path)}")
continue
_, parsed_study, subject = parsed
if parsed_study != study:
continue # patří jiné studii
try:
import_to_mongo.import_visits_single_file(study, subject, path)
_move_done(path)
except Exception as e:
print(f" [{study}] CHYBA detail {os.path.basename(path)}: {e}")
def main():
if not os.path.isdir(INCOMING_DIR):
print(f"Adresář neexistuje: {INCOMING_DIR}")
return
ensure_indexes()
print("=" * 60)
print("Import Subject Summary + Visits")
print("=" * 60)
for study in STUDIES:
import_summaries(study)
import_details(study)
print("\n" + "=" * 60)
print("Import notifikací")
print("=" * 60)
import_notifications_to_mongo.import_from_dir(INCOMING_DIR, DONE_DIR, STUDIES)
print("\n" + "=" * 60)
print(f"Hotovo. Zpracované soubory: {DONE_DIR}")
print("=" * 60)
if __name__ == "__main__":
main()