Add Outlook/Soubory/Clario/Feasibility scripts and reports; ignore Incoming, Outlook downloads & profile
This commit is contained in:
@@ -0,0 +1,175 @@
|
||||
"""
|
||||
Kompletní pipeline:
|
||||
1. Stažení Subject Summary Reportů (obě studie)
|
||||
2. Stažení Subject Detail Reportů + notifikací (obě studie)
|
||||
3. Import do MongoDB (subject_summary + visits + notifications)
|
||||
|
||||
Spusť tento skript místo samostatných skriptů.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import glob
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
import download_subject_details as dsd
|
||||
import import_to_mongo
|
||||
import import_notifications_to_mongo
|
||||
|
||||
# ── CONFIG ───────────────────────────────────────────────────────────────────
|
||||
BASE_URL = "https://janssen.4gclinical.com"
|
||||
EMAIL = "vbuzalka@its.jnj.com"
|
||||
PASSWORD = "Vlado123++-+"
|
||||
|
||||
STUDIES = ["77242113UCO3001", "42847922MDD3003"]
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
INCOMING_DIR = os.path.join(BASE_DIR, "IncomingSourceReports")
|
||||
DETAILS_DIR = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
|
||||
|
||||
|
||||
# ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def unique_path(directory, stem):
|
||||
path = os.path.join(directory, f"{stem}.xlsx")
|
||||
if not os.path.exists(path):
|
||||
return path
|
||||
time_tag = datetime.datetime.now().strftime("%H%M")
|
||||
return os.path.join(directory, f"{stem} {time_tag}.xlsx")
|
||||
|
||||
|
||||
def login(page, study):
|
||||
page.goto(BASE_URL)
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.get_by_label("Email *").fill(EMAIL)
|
||||
page.get_by_label("Password *").fill(PASSWORD)
|
||||
page.locator("#login__submit").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.get_by_label("Study *").click()
|
||||
page.get_by_role("option", name=study).click()
|
||||
page.get_by_role("button", name="SELECT").click()
|
||||
page.wait_for_load_state("networkidle")
|
||||
|
||||
|
||||
# ── KROK 1: Subject Summary ───────────────────────────────────────────────────
|
||||
|
||||
def download_summary(page, study, today):
|
||||
print(f" [{study}] Stahuji Subject Summary Report...")
|
||||
page.goto(f"{BASE_URL}/report/patient_summary_report")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
filename = unique_path(INCOMING_DIR, f"{today} {study} Subject Summary Report")
|
||||
with page.expect_download(timeout=120000) as dl:
|
||||
page.get_by_role("button", name="Download XLS").click()
|
||||
dl.value.save_as(filename)
|
||||
print(f" [{study}] Summary OK -> {os.path.basename(filename)}")
|
||||
return filename
|
||||
|
||||
|
||||
# ── KROK 2: Subject Details ───────────────────────────────────────────────────
|
||||
|
||||
def get_subjects_from_summary(summary_path):
|
||||
import pandas as pd
|
||||
raw = pd.read_excel(summary_path, header=None)
|
||||
header_row = None
|
||||
for i, row in raw.iterrows():
|
||||
if "Subject" in [str(v).strip() for v in row]:
|
||||
header_row = i
|
||||
break
|
||||
if header_row is None:
|
||||
raise ValueError("Hlavičkový řádek nenalezen")
|
||||
df = pd.read_excel(summary_path, header=header_row)
|
||||
return df["Subject"].dropna().astype(str).str.strip().tolist()
|
||||
|
||||
|
||||
def download_details(page, study, summary_path, today):
|
||||
out_dir = os.path.join(DETAILS_DIR, study)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
subjects = get_subjects_from_summary(summary_path)
|
||||
print(f" [{study}] Subjektů k stažení: {len(subjects)}")
|
||||
|
||||
page.goto(f"{BASE_URL}/report/patient_detail_report")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
for subject in subjects:
|
||||
filename = os.path.join(out_dir, f"{today} {study} {subject} Subject Detail.xlsx")
|
||||
input_field = page.locator('input[placeholder="search"], input[type="text"]').first
|
||||
input_field.click()
|
||||
input_field.fill(subject)
|
||||
page.wait_for_timeout(500)
|
||||
page.locator("mat-option").first.dispatch_event("click")
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
with page.expect_download(timeout=120000) as dl:
|
||||
page.get_by_role("button", name="Download XLS").click()
|
||||
dl.value.save_as(filename)
|
||||
print(f" [{study}] Detail {subject} OK")
|
||||
|
||||
page.get_by_role("button", name="Clear").click()
|
||||
page.wait_for_load_state("networkidle", timeout=120000)
|
||||
|
||||
|
||||
# ── KROK 3: Import do MongoDB ────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
today = datetime.date.today().strftime("%Y-%m-%d")
|
||||
os.makedirs(INCOMING_DIR, exist_ok=True)
|
||||
os.makedirs(DETAILS_DIR, exist_ok=True)
|
||||
|
||||
summary_paths = {}
|
||||
|
||||
# Krok 1 + 2: stahování (Playwright, každá studie zvlášť kvůli session)
|
||||
with sync_playwright() as p:
|
||||
for study in STUDIES:
|
||||
print("\n" + "=" * 60)
|
||||
print(f"[{study}] KROK 1: Subject Summary Report")
|
||||
print("=" * 60)
|
||||
browser = p.chromium.launch(headless=False)
|
||||
context = browser.new_context(accept_downloads=True)
|
||||
page = context.new_page()
|
||||
|
||||
try:
|
||||
login(page, study)
|
||||
summary_path = download_summary(page, study, today)
|
||||
summary_paths[study] = summary_path
|
||||
|
||||
print(f"\n[{study}] KROK 2: Subject Detail Reports + notifikace")
|
||||
dsd.run(page, study)
|
||||
|
||||
except Exception as e:
|
||||
print(f" [{study}] CHYBA při stahování: {e}")
|
||||
summary_paths[study] = None
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
# Krok 3: import do MongoDB
|
||||
print("\n" + "=" * 60)
|
||||
print("KROK 3: Import do MongoDB")
|
||||
print("=" * 60)
|
||||
|
||||
for study in STUDIES:
|
||||
summary_path = summary_paths.get(study)
|
||||
if not summary_path:
|
||||
print(f" [{study}] PŘESKOČENO — stahování selhalo")
|
||||
continue
|
||||
|
||||
try:
|
||||
import_to_mongo.run(study, summary_path, DETAILS_DIR, today)
|
||||
except Exception as e:
|
||||
print(f" [{study}] CHYBA při importu summary/visits: {e}")
|
||||
|
||||
# Notifikace: PDF/JSON z disku rovnou do Mongo iwrs_notifications
|
||||
print("\n [notifikace] import PDF/JSON do Mongo...")
|
||||
try:
|
||||
import_notifications_to_mongo.main(STUDIES)
|
||||
except Exception as e:
|
||||
print(f" CHYBA při importu notifikací: {e}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Vše hotovo.")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
main()
|
||||
Reference in New Issue
Block a user