449 lines
17 KiB
Python
449 lines
17 KiB
Python
# ============================================================
|
||
# studytraining_reports_export_v1.0.py
|
||
# Verze: 1.0
|
||
# Datum: 2026-06-13
|
||
# Popis: Export reportů z J&J Veeva "Study Training" vaultu
|
||
# (its-jnj-studytraining.veevavault.com) do CSV.
|
||
# Jeden běh udělá:
|
||
# 1) login do vaultu (persistentní session + ruční 2FA),
|
||
# 2) pro každou (report × studie) kombinaci otevře PŘÍMÝ
|
||
# link s předvyplněnými filtry (Study + Country=Czech
|
||
# Republic, Site skip) — prompt "Select Report Values"
|
||
# se tím přeskočí,
|
||
# 3) počká na dopočítání reportu (jsou pomalé),
|
||
# 4) ⋯ (Actions) -> Export to CSV [-> Data Only] -> Export,
|
||
# zachytí download a uloží do StudyTraining/exports/.
|
||
#
|
||
# Linky se NESkládají natvrdo — generují se z tabulek
|
||
# STUDIES + REPORTS, takže přidat studii/report = jeden
|
||
# řádek. Interní ID (VC8 = studie, VC9 = Czech Republic)
|
||
# a formáty klíčů jsou ověřené (viz
|
||
# StudyTraining/training_report_links_CZ.md).
|
||
#
|
||
# Tento skript NEUKLÁDÁ do Mongo a NESTAHUJE dokumenty —
|
||
# jen exportuje reporty. (Parsování/Mongo doplníme později
|
||
# podle toho, jak bude vypadat dashboard.)
|
||
#
|
||
# Vychází z logiky vtmf_pipeline_v1.3.py (login, dialogy,
|
||
# export přes ⋯ menu).
|
||
#
|
||
# Heslo se NIKDY nedává natvrdo — čte se z .env v rootu projektu
|
||
# Janssen (VAULT_USER / VAULT_PASS).
|
||
# ============================================================
|
||
|
||
import os
|
||
import re
|
||
import sys
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||
|
||
# --- Konfigurace -------------------------------------------------------
|
||
|
||
HOST = "its-jnj-studytraining.veevavault.com"
|
||
BASE_VIEWER = f"https://{HOST}/ui/#reporting/viewer/"
|
||
VAULT_HOME = f"https://{HOST}/"
|
||
VAULT_UI_PATTERN = f"https://{HOST}/ui**"
|
||
# Jsme uvnitř vaultu jen když URL ZAČÍNÁ na host/ui. Pozor: přihlašovací
|
||
# stránka login.veevavault.com má host/ui v parametru retURL — proto
|
||
# nestačí substring, musí to být prefix.
|
||
def in_vault(page):
|
||
return page.url.startswith(f"https://{HOST}/ui")
|
||
|
||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||
PROFILE_DIR = SCRIPT_DIR / "studytraining_profile" # perzistentní session
|
||
ENV_FILE = SCRIPT_DIR.parent / ".env" # root projektu Janssen
|
||
DEBUG_DIR = SCRIPT_DIR / "debug" # diagnostika při selhání
|
||
OUTPUT_DIR = SCRIPT_DIR / "exports" # sem padají CSV
|
||
|
||
# Studie -> interní ID (VC8 = Study, VC9 = Study Country "Czech Republic").
|
||
# Ověřeno 2026-06-13 z URL po spuštění (globální napříč reporty).
|
||
STUDIES = {
|
||
"77242113UCO3001": {"study": "VC8000000008007", "cz": "VC900000000B076"},
|
||
"77242113CRD3001": {"study": "VC8000000008010", "cz": "VC900000000A093"},
|
||
"42847922MDD3003": {"study": "VC800000000B067", "cz": "VC900000000D751"},
|
||
}
|
||
|
||
# Které studie v tomhle běhu exportovat (zbytek nech v STUDIES pro budoucno).
|
||
STUDIES_TO_RUN = ["77242113UCO3001"]
|
||
|
||
# Multipass klíčový prefix (sdílený všemi Multipass reporty, ověřeno).
|
||
MP = "report_view_person_with_learner_rolep__c.learner_role_person__v___person__v."
|
||
# V004 (jiný typ reportu) má vlastní reportTypeRef prefix.
|
||
OSF = "reportTypeRef1586959950918."
|
||
|
||
# Definice reportů: id, krátký kód do názvu souboru, typ klíčů, má Site filtr?
|
||
# keytype "multipass" -> klíče MP+study__v / study_country__v / study_site__v
|
||
# keytype "osf" -> klíče OSF+OSF00000005A604 / A605 / A606
|
||
REPORTS = [
|
||
{"id": "0RP00000000V004", "code": "Status", "keytype": "osf", "has_site": True},
|
||
{"id": "0RP00000000V006", "code": "OpenAssignments", "keytype": "multipass", "has_site": False},
|
||
{"id": "0RP00000000V007", "code": "ByTrainingRole", "keytype": "multipass", "has_site": True},
|
||
{"id": "0RP00000000V008", "code": "OverdueFiltered", "keytype": "multipass", "has_site": True},
|
||
{"id": "0RP00000000V003", "code": "ComplianceRisks", "keytype": "multipass", "has_site": True},
|
||
{"id": "0RP00000000V005", "code": "AllOverdue", "keytype": "multipass", "has_site": True},
|
||
]
|
||
|
||
# URL-kódování: čárka -> %2C, středník -> %3B
|
||
EQ = "%2C%2C%2CEQ=" # ...,,,EQ=<hodnota>
|
||
SKIP = "%2C%2C%2CEQ%3Bskip=" # ...,,,EQ;skip=
|
||
|
||
# Čekání / robustnost
|
||
REPORT_TIMEOUT_MS = 150000 # report je pomalý — dej mu čas dopočítat
|
||
DL_TIMEOUT_MS = 120000 # timeout na samotný download
|
||
MAX_ATTEMPTS = 2 # pokusy na jednu kombinaci
|
||
RETRY_PAUSE_MS = 5000
|
||
BETWEEN_MS = 800
|
||
|
||
|
||
def log(msg):
|
||
print(msg, flush=True)
|
||
|
||
|
||
# --- .env / přihlašovací údaje -----------------------------------------
|
||
|
||
def load_env_file(path):
|
||
"""Načte KEY=VALUE z .env do os.environ (už nastavené nepřepisuje)."""
|
||
if not path.exists():
|
||
log(f"[!] .env nenalezen: {path}")
|
||
return
|
||
for line in path.read_text(encoding="utf-8").splitlines():
|
||
line = line.strip()
|
||
if not line or line.startswith("#") or "=" not in line:
|
||
continue
|
||
key, _, value = line.partition("=")
|
||
key, value = key.strip(), value.strip().strip('"').strip("'")
|
||
if value and key not in os.environ:
|
||
os.environ[key] = value
|
||
|
||
|
||
def ensure_credentials():
|
||
load_env_file(ENV_FILE)
|
||
if all(os.environ.get(k) for k in ("VAULT_USER", "VAULT_PASS")):
|
||
return
|
||
print("\n" + "=" * 60)
|
||
print(" CHYBÍ PŘIHLAŠOVACÍ ÚDAJE.")
|
||
print(f" Doplň VAULT_USER a VAULT_PASS do: {ENV_FILE}")
|
||
print("=" * 60)
|
||
sys.exit(1)
|
||
|
||
|
||
# --- Skládání linků ----------------------------------------------------
|
||
|
||
def build_report_url(report, study_number):
|
||
"""Sestaví přímý link reportu pro danou studii + Czech Republic."""
|
||
ids = STUDIES[study_number]
|
||
if report["keytype"] == "osf":
|
||
k_study, k_country, k_site = (OSF + "OSF00000005A604",
|
||
OSF + "OSF00000005A605",
|
||
OSF + "OSF00000005A606")
|
||
else:
|
||
k_study, k_country, k_site = (MP + "study__v",
|
||
MP + "study_country__v",
|
||
MP + "study_site__v")
|
||
parts = [f"{k_study}{EQ}{ids['study']}",
|
||
f"{k_country}{EQ}{ids['cz']}"]
|
||
if report["has_site"]:
|
||
parts.append(f"{k_site}{SKIP}")
|
||
return BASE_VIEWER + report["id"] + "?" + "&".join(parts)
|
||
|
||
|
||
# --- Přihlášení (přes Veeva "Click to log in with Johnson&Johnson") ----
|
||
|
||
DIALOG_OK_SELECTOR = (".ui-dialog a.ok.vv_button, "
|
||
".vv_login_msg_dialog .vv_button.ok")
|
||
|
||
|
||
def dismiss_maintenance_popup(page, timeout=6000):
|
||
"""Zavře Veeva login/maintenance dialog (viditelné OK je <a>)."""
|
||
ok = page.locator(DIALOG_OK_SELECTOR)
|
||
try:
|
||
ok.first.wait_for(state="visible", timeout=timeout)
|
||
except Exception:
|
||
return False
|
||
for _ in range(5):
|
||
try:
|
||
if ok.count() and ok.first.is_visible():
|
||
ok.first.click()
|
||
page.wait_for_timeout(300)
|
||
log("[i] Maintenance/login dialog zavřen (OK).")
|
||
continue
|
||
except Exception:
|
||
pass
|
||
break
|
||
return True
|
||
|
||
|
||
def submit_login_form(page, password_box):
|
||
"""Odešle J&J login formulář (Sign On / Login / submit / Enter)."""
|
||
candidates = [
|
||
page.get_by_role("button", name=re.compile("sign\\s*on", re.I)),
|
||
page.get_by_role("button", name=re.compile("log\\s*in|sign\\s*in", re.I)),
|
||
page.locator("input[type='submit']"),
|
||
page.locator("button[type='submit']"),
|
||
]
|
||
for loc in candidates:
|
||
try:
|
||
if loc.count() and loc.first.is_visible():
|
||
loc.first.click()
|
||
return
|
||
except Exception:
|
||
continue
|
||
password_box.press("Enter")
|
||
|
||
|
||
def login_if_needed(page):
|
||
"""Přihlášení do Study Training vaultu. Persistentní session login
|
||
přeskočí; jinak Veeva 'Click to log in with Johnson&Johnson' -> J&J
|
||
SSO formulář (z .env) -> ruční 2FA -> /ui."""
|
||
log("[i] Otevírám vault...")
|
||
page.goto(VAULT_HOME, wait_until="domcontentloaded")
|
||
page.wait_for_timeout(2000)
|
||
|
||
if in_vault(page):
|
||
log("[i] Už přihlášen (perzistentní session).")
|
||
return
|
||
|
||
# Veeva uvítací stránka: "Click to log in with Johnson&Johnson"
|
||
sso = page.get_by_role("button",
|
||
name=re.compile("log in with|Johnson", re.I))
|
||
try:
|
||
if sso.count() and sso.first.is_visible():
|
||
log("[i] Klikám 'Click to log in with Johnson&Johnson'...")
|
||
sso.first.click()
|
||
page.wait_for_timeout(2500)
|
||
except Exception:
|
||
pass
|
||
|
||
if in_vault(page):
|
||
log("[i] Přihlášen přes SSO (bez formuláře).")
|
||
return
|
||
|
||
# J&J login formulář (pokud session J&J nežije)
|
||
user_box = page.locator("input[type='text'], input[type='email']").first
|
||
try:
|
||
user_box.wait_for(timeout=8000)
|
||
log("[i] Vyplňuji přihlašovací údaje...")
|
||
user_box.fill(os.environ["VAULT_USER"])
|
||
pwd = page.locator("input[type='password']").first
|
||
pwd.fill(os.environ["VAULT_PASS"])
|
||
submit_login_form(page, pwd)
|
||
except PWTimeout:
|
||
if in_vault(page):
|
||
return
|
||
log("[!] Nenašel jsem login formulář — možná čeká SSO/2FA.")
|
||
|
||
# Výsledek / 2FA
|
||
try:
|
||
page.wait_for_url(VAULT_UI_PATTERN, timeout=15000)
|
||
log("[ok] Přihlášen (bez 2FA).")
|
||
return
|
||
except PWTimeout:
|
||
pass
|
||
|
||
print("\n" + "=" * 60)
|
||
print(" VYŽADOVÁNO OVĚŘENÍ NA TELEFONU (2FA).")
|
||
print(" Potvrď přihlášení v mobilní aplikaci.")
|
||
print("=" * 60)
|
||
input(" Až to potvrdíš, stiskni ENTER pro pokračování... ")
|
||
page.wait_for_url(VAULT_UI_PATTERN, timeout=120000)
|
||
log("[ok] Přihlášení dokončeno.")
|
||
|
||
|
||
# --- Diagnostika -------------------------------------------------------
|
||
|
||
def save_page_debug(page, tag):
|
||
out = DEBUG_DIR / datetime.now().strftime(f"%Y-%m-%d_%H-%M-%S_{tag}")
|
||
out.mkdir(parents=True, exist_ok=True)
|
||
try:
|
||
page.screenshot(path=str(out / "screenshot.png"), full_page=False)
|
||
except Exception:
|
||
pass
|
||
try:
|
||
(out / "page.html").write_text(page.content(), encoding="utf-8")
|
||
except Exception:
|
||
pass
|
||
log(f"[!] Diagnostika uložena do: {out}")
|
||
return out
|
||
|
||
|
||
# --- Export reportu do CSV ---------------------------------------------
|
||
|
||
def open_actions_menu(page):
|
||
"""Otevře ⋯ (Actions) menu reportu. Vrací True/False."""
|
||
selectors = [
|
||
".actionMenuContainer .dropDown.vv_dropdown_toggle button.vv-icon-button",
|
||
".actionMenuContainer button.vv-icon-button",
|
||
".actionMenuContainer button",
|
||
"button[title='Actions'], [aria-label='Actions']",
|
||
]
|
||
for sel in selectors:
|
||
loc = page.locator(sel)
|
||
try:
|
||
if loc.count() and loc.first.is_visible():
|
||
loc.first.click()
|
||
return True
|
||
except Exception:
|
||
continue
|
||
return False
|
||
|
||
|
||
def wait_report_ready(page):
|
||
"""Počká, až report DOPOČÍTÁ. Spolehlivý signál je toast 'Running
|
||
report …', který se objeví během počítání a zmizí, jakmile je hotovo.
|
||
(Čekat jen na text 'Returned' nestačí — v SPA tam může zůstat zbytek
|
||
předchozího reportu a export by sebral prázdný/rozpočítaný grid.)"""
|
||
toast = page.locator("text=/Running report/i")
|
||
# 1) toast se objeví = nový report se rozběhl (když ne, je bleskový)
|
||
appeared = False
|
||
try:
|
||
toast.first.wait_for(state="visible", timeout=12000)
|
||
appeared = True
|
||
except PWTimeout:
|
||
pass
|
||
# 2) toast zmizí = report dopočítán
|
||
if appeared:
|
||
try:
|
||
toast.first.wait_for(state="hidden", timeout=REPORT_TIMEOUT_MS)
|
||
except PWTimeout:
|
||
save_page_debug(page, "report_running_timeout")
|
||
raise RuntimeError("Report se nedopočítal (toast 'Running report' "
|
||
"nezmizel). Diagnostika v debug/.")
|
||
# 3) pojistka: počkat na "Returned N records" (i 0) + usazení gridu
|
||
try:
|
||
page.wait_for_selector("text=/Returned\\s+\\d/i", timeout=20000)
|
||
except PWTimeout:
|
||
pass
|
||
page.wait_for_timeout(3000)
|
||
|
||
|
||
def export_report_csv(page, url, dest):
|
||
"""Otevře report přes přímý link, počká na dopočítání a vyexportuje
|
||
do CSV (Data Only). Uloží do dest. Při selhání -> debug/ + výjimka."""
|
||
log(f"[i] Otevírám report: {url[:90]}...")
|
||
page.goto(url, wait_until="domcontentloaded")
|
||
dismiss_maintenance_popup(page, timeout=4000)
|
||
|
||
# ověř, že vůbec existuje report viewer (⋯ menu v hlavičce se vykreslí
|
||
# rychle, ještě před dopočítáním dat) — jinak nemá smysl čekat na toast
|
||
try:
|
||
page.wait_for_selector(".actionMenuContainer", timeout=20000)
|
||
except PWTimeout:
|
||
save_page_debug(page, "report_load")
|
||
raise RuntimeError("Report viewer se nenačetl. Diagnostika v debug/.")
|
||
|
||
wait_report_ready(page)
|
||
log("[i] Report dopočítán, otevírám ⋯ a exportuji do CSV...")
|
||
|
||
if not open_actions_menu(page):
|
||
save_page_debug(page, "menu")
|
||
raise RuntimeError("Nenašel jsem ⋯ (Actions) menu. Diagnostika v debug/.")
|
||
|
||
# Položka "Export to CSV" (menu se načítá asynchronně).
|
||
item = page.locator("a.ReportAction[data-action-name='CsvExport']")
|
||
try:
|
||
item.first.wait_for(state="visible", timeout=15000)
|
||
except PWTimeout:
|
||
item = page.get_by_text("Export to CSV", exact=True)
|
||
try:
|
||
item.first.wait_for(state="visible", timeout=5000)
|
||
except PWTimeout:
|
||
save_page_debug(page, "csv_item")
|
||
raise RuntimeError("Nenašel jsem 'Export to CSV'. Diagnostika v debug/.")
|
||
|
||
# Download může přijít buď rovnou po kliknutí na položku, nebo až po
|
||
# potvrzení v dialogu (Data Only -> Export). Pokryjeme obojí.
|
||
with page.expect_download(timeout=DL_TIMEOUT_MS) as dl_info:
|
||
item.first.click()
|
||
# Volitelný dialog "Export Options": Data Only + tlačítko Export.
|
||
try:
|
||
radio = page.locator(
|
||
"input[name='requiredRadioField'][value='STANDARD']")
|
||
radio.first.wait_for(state="visible", timeout=6000)
|
||
if not radio.first.is_checked():
|
||
radio.first.check()
|
||
export_btn = page.get_by_role("button", name="Export", exact=True)
|
||
export_btn.first.wait_for(state="visible", timeout=6000)
|
||
export_btn.first.click()
|
||
except PWTimeout:
|
||
pass # žádný dialog -> download už běží z kliknutí na položku
|
||
download = dl_info.value
|
||
|
||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||
download.save_as(str(dest))
|
||
try:
|
||
n_rows = max(0, sum(1 for _ in dest.open(encoding="utf-8",
|
||
errors="ignore")) - 1)
|
||
log(f"[ok] Uloženo: {dest.name} ({n_rows} datových řádků)")
|
||
except Exception:
|
||
log(f"[ok] Uloženo: {dest.name}")
|
||
return dest
|
||
|
||
|
||
# --- Main --------------------------------------------------------------
|
||
|
||
def main():
|
||
ensure_credentials()
|
||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
||
# Sestav seznam úkolů (report × studie)
|
||
tasks = []
|
||
for study in STUDIES_TO_RUN:
|
||
if study not in STUDIES:
|
||
log(f"[!] Studie {study} není v STUDIES — přeskakuji.")
|
||
continue
|
||
for rep in REPORTS:
|
||
tasks.append((rep, study))
|
||
log(f"[i] Ke zpracování: {len(tasks)} reportů "
|
||
f"({len(STUDIES_TO_RUN)} studie × {len(REPORTS)} reportů).")
|
||
|
||
ok_count = fail_count = 0
|
||
with sync_playwright() as p:
|
||
ctx = p.chromium.launch_persistent_context(
|
||
user_data_dir=str(PROFILE_DIR),
|
||
headless=False,
|
||
accept_downloads=True,
|
||
no_viewport=True,
|
||
args=["--start-maximized"],
|
||
)
|
||
page = ctx.pages[0] if ctx.pages else ctx.new_page()
|
||
try:
|
||
login_if_needed(page)
|
||
dismiss_maintenance_popup(page, timeout=4000)
|
||
|
||
ts = datetime.now().strftime("%Y-%m-%d_%H-%M")
|
||
for n, (rep, study) in enumerate(tasks, 1):
|
||
short_study = study # plné číslo studie do názvu
|
||
fname = f"{ts}_{rep['code']}_{short_study}_CZ.csv"
|
||
dest = OUTPUT_DIR / fname
|
||
url = build_report_url(rep, study)
|
||
log(f"\n--- [{n}/{len(tasks)}] {rep['code']} | {study}")
|
||
last_err = None
|
||
for attempt in range(1, MAX_ATTEMPTS + 1):
|
||
try:
|
||
export_report_csv(page, url, dest)
|
||
ok_count += 1
|
||
last_err = None
|
||
break
|
||
except Exception as e:
|
||
last_err = e
|
||
log(f"[!] Pokus {attempt}/{MAX_ATTEMPTS} selhal: {e}")
|
||
if attempt < MAX_ATTEMPTS:
|
||
page.wait_for_timeout(RETRY_PAUSE_MS)
|
||
if last_err is not None:
|
||
fail_count += 1
|
||
page.wait_for_timeout(BETWEEN_MS)
|
||
except KeyboardInterrupt:
|
||
log("\n[!] Přerušeno uživatelem.")
|
||
finally:
|
||
log(f"\n[i] Hotovo: {ok_count} OK, {fail_count} chyb. "
|
||
f"Výstup: {OUTPUT_DIR}")
|
||
log("[i] Zavírám prohlížeč.")
|
||
ctx.close()
|
||
sys.exit(1 if fail_count else 0)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|