notebook
This commit is contained in:
@@ -0,0 +1,448 @@
|
||||
# ============================================================
|
||||
# studytraining_reports_export_v1.0.py
|
||||
# Verze: 1.0
|
||||
# Datum: 2026-06-13
|
||||
# Popis: Export reportů z J&J Veeva "Study Training" vaultu
|
||||
# (its-jnj-studytraining.veevavault.com) do CSV.
|
||||
# Jeden běh udělá:
|
||||
# 1) login do vaultu (persistentní session + ruční 2FA),
|
||||
# 2) pro každou (report × studie) kombinaci otevře PŘÍMÝ
|
||||
# link s předvyplněnými filtry (Study + Country=Czech
|
||||
# Republic, Site skip) — prompt "Select Report Values"
|
||||
# se tím přeskočí,
|
||||
# 3) počká na dopočítání reportu (jsou pomalé),
|
||||
# 4) ⋯ (Actions) -> Export to CSV [-> Data Only] -> Export,
|
||||
# zachytí download a uloží do StudyTraining/exports/.
|
||||
#
|
||||
# Linky se NESkládají natvrdo — generují se z tabulek
|
||||
# STUDIES + REPORTS, takže přidat studii/report = jeden
|
||||
# řádek. Interní ID (VC8 = studie, VC9 = Czech Republic)
|
||||
# a formáty klíčů jsou ověřené (viz
|
||||
# StudyTraining/training_report_links_CZ.md).
|
||||
#
|
||||
# Tento skript NEUKLÁDÁ do Mongo a NESTAHUJE dokumenty —
|
||||
# jen exportuje reporty. (Parsování/Mongo doplníme později
|
||||
# podle toho, jak bude vypadat dashboard.)
|
||||
#
|
||||
# Vychází z logiky vtmf_pipeline_v1.3.py (login, dialogy,
|
||||
# export přes ⋯ menu).
|
||||
#
|
||||
# Heslo se NIKDY nedává natvrdo — čte se z .env v rootu projektu
|
||||
# Janssen (VAULT_USER / VAULT_PASS).
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||||
|
||||
# --- Konfigurace -------------------------------------------------------
|
||||
|
||||
HOST = "its-jnj-studytraining.veevavault.com"
|
||||
BASE_VIEWER = f"https://{HOST}/ui/#reporting/viewer/"
|
||||
VAULT_HOME = f"https://{HOST}/"
|
||||
VAULT_UI_PATTERN = f"https://{HOST}/ui**"
|
||||
# Jsme uvnitř vaultu jen když URL ZAČÍNÁ na host/ui. Pozor: přihlašovací
|
||||
# stránka login.veevavault.com má host/ui v parametru retURL — proto
|
||||
# nestačí substring, musí to být prefix.
|
||||
def in_vault(page):
|
||||
return page.url.startswith(f"https://{HOST}/ui")
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
PROFILE_DIR = SCRIPT_DIR / "studytraining_profile" # perzistentní session
|
||||
ENV_FILE = SCRIPT_DIR.parent / ".env" # root projektu Janssen
|
||||
DEBUG_DIR = SCRIPT_DIR / "debug" # diagnostika při selhání
|
||||
OUTPUT_DIR = SCRIPT_DIR / "exports" # sem padají CSV
|
||||
|
||||
# Studie -> interní ID (VC8 = Study, VC9 = Study Country "Czech Republic").
|
||||
# Ověřeno 2026-06-13 z URL po spuštění (globální napříč reporty).
|
||||
STUDIES = {
|
||||
"77242113UCO3001": {"study": "VC8000000008007", "cz": "VC900000000B076"},
|
||||
"77242113CRD3001": {"study": "VC8000000008010", "cz": "VC900000000A093"},
|
||||
"42847922MDD3003": {"study": "VC800000000B067", "cz": "VC900000000D751"},
|
||||
}
|
||||
|
||||
# Které studie v tomhle běhu exportovat (zbytek nech v STUDIES pro budoucno).
|
||||
STUDIES_TO_RUN = ["77242113UCO3001"]
|
||||
|
||||
# Multipass klíčový prefix (sdílený všemi Multipass reporty, ověřeno).
|
||||
MP = "report_view_person_with_learner_rolep__c.learner_role_person__v___person__v."
|
||||
# V004 (jiný typ reportu) má vlastní reportTypeRef prefix.
|
||||
OSF = "reportTypeRef1586959950918."
|
||||
|
||||
# Definice reportů: id, krátký kód do názvu souboru, typ klíčů, má Site filtr?
|
||||
# keytype "multipass" -> klíče MP+study__v / study_country__v / study_site__v
|
||||
# keytype "osf" -> klíče OSF+OSF00000005A604 / A605 / A606
|
||||
REPORTS = [
|
||||
{"id": "0RP00000000V004", "code": "Status", "keytype": "osf", "has_site": True},
|
||||
{"id": "0RP00000000V006", "code": "OpenAssignments", "keytype": "multipass", "has_site": False},
|
||||
{"id": "0RP00000000V007", "code": "ByTrainingRole", "keytype": "multipass", "has_site": True},
|
||||
{"id": "0RP00000000V008", "code": "OverdueFiltered", "keytype": "multipass", "has_site": True},
|
||||
{"id": "0RP00000000V003", "code": "ComplianceRisks", "keytype": "multipass", "has_site": True},
|
||||
{"id": "0RP00000000V005", "code": "AllOverdue", "keytype": "multipass", "has_site": True},
|
||||
]
|
||||
|
||||
# URL-kódování: čárka -> %2C, středník -> %3B
|
||||
EQ = "%2C%2C%2CEQ=" # ...,,,EQ=<hodnota>
|
||||
SKIP = "%2C%2C%2CEQ%3Bskip=" # ...,,,EQ;skip=
|
||||
|
||||
# Čekání / robustnost
|
||||
REPORT_TIMEOUT_MS = 150000 # report je pomalý — dej mu čas dopočítat
|
||||
DL_TIMEOUT_MS = 120000 # timeout na samotný download
|
||||
MAX_ATTEMPTS = 2 # pokusy na jednu kombinaci
|
||||
RETRY_PAUSE_MS = 5000
|
||||
BETWEEN_MS = 800
|
||||
|
||||
|
||||
def log(msg):
|
||||
print(msg, flush=True)
|
||||
|
||||
|
||||
# --- .env / přihlašovací údaje -----------------------------------------
|
||||
|
||||
def load_env_file(path):
|
||||
"""Načte KEY=VALUE z .env do os.environ (už nastavené nepřepisuje)."""
|
||||
if not path.exists():
|
||||
log(f"[!] .env nenalezen: {path}")
|
||||
return
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, _, value = line.partition("=")
|
||||
key, value = key.strip(), value.strip().strip('"').strip("'")
|
||||
if value and key not in os.environ:
|
||||
os.environ[key] = value
|
||||
|
||||
|
||||
def ensure_credentials():
|
||||
load_env_file(ENV_FILE)
|
||||
if all(os.environ.get(k) for k in ("VAULT_USER", "VAULT_PASS")):
|
||||
return
|
||||
print("\n" + "=" * 60)
|
||||
print(" CHYBÍ PŘIHLAŠOVACÍ ÚDAJE.")
|
||||
print(f" Doplň VAULT_USER a VAULT_PASS do: {ENV_FILE}")
|
||||
print("=" * 60)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# --- Skládání linků ----------------------------------------------------
|
||||
|
||||
def build_report_url(report, study_number):
|
||||
"""Sestaví přímý link reportu pro danou studii + Czech Republic."""
|
||||
ids = STUDIES[study_number]
|
||||
if report["keytype"] == "osf":
|
||||
k_study, k_country, k_site = (OSF + "OSF00000005A604",
|
||||
OSF + "OSF00000005A605",
|
||||
OSF + "OSF00000005A606")
|
||||
else:
|
||||
k_study, k_country, k_site = (MP + "study__v",
|
||||
MP + "study_country__v",
|
||||
MP + "study_site__v")
|
||||
parts = [f"{k_study}{EQ}{ids['study']}",
|
||||
f"{k_country}{EQ}{ids['cz']}"]
|
||||
if report["has_site"]:
|
||||
parts.append(f"{k_site}{SKIP}")
|
||||
return BASE_VIEWER + report["id"] + "?" + "&".join(parts)
|
||||
|
||||
|
||||
# --- Přihlášení (přes Veeva "Click to log in with Johnson&Johnson") ----
|
||||
|
||||
DIALOG_OK_SELECTOR = (".ui-dialog a.ok.vv_button, "
|
||||
".vv_login_msg_dialog .vv_button.ok")
|
||||
|
||||
|
||||
def dismiss_maintenance_popup(page, timeout=6000):
|
||||
"""Zavře Veeva login/maintenance dialog (viditelné OK je <a>)."""
|
||||
ok = page.locator(DIALOG_OK_SELECTOR)
|
||||
try:
|
||||
ok.first.wait_for(state="visible", timeout=timeout)
|
||||
except Exception:
|
||||
return False
|
||||
for _ in range(5):
|
||||
try:
|
||||
if ok.count() and ok.first.is_visible():
|
||||
ok.first.click()
|
||||
page.wait_for_timeout(300)
|
||||
log("[i] Maintenance/login dialog zavřen (OK).")
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
return True
|
||||
|
||||
|
||||
def submit_login_form(page, password_box):
|
||||
"""Odešle J&J login formulář (Sign On / Login / submit / Enter)."""
|
||||
candidates = [
|
||||
page.get_by_role("button", name=re.compile("sign\\s*on", re.I)),
|
||||
page.get_by_role("button", name=re.compile("log\\s*in|sign\\s*in", re.I)),
|
||||
page.locator("input[type='submit']"),
|
||||
page.locator("button[type='submit']"),
|
||||
]
|
||||
for loc in candidates:
|
||||
try:
|
||||
if loc.count() and loc.first.is_visible():
|
||||
loc.first.click()
|
||||
return
|
||||
except Exception:
|
||||
continue
|
||||
password_box.press("Enter")
|
||||
|
||||
|
||||
def login_if_needed(page):
|
||||
"""Přihlášení do Study Training vaultu. Persistentní session login
|
||||
přeskočí; jinak Veeva 'Click to log in with Johnson&Johnson' -> J&J
|
||||
SSO formulář (z .env) -> ruční 2FA -> /ui."""
|
||||
log("[i] Otevírám vault...")
|
||||
page.goto(VAULT_HOME, wait_until="domcontentloaded")
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
if in_vault(page):
|
||||
log("[i] Už přihlášen (perzistentní session).")
|
||||
return
|
||||
|
||||
# Veeva uvítací stránka: "Click to log in with Johnson&Johnson"
|
||||
sso = page.get_by_role("button",
|
||||
name=re.compile("log in with|Johnson", re.I))
|
||||
try:
|
||||
if sso.count() and sso.first.is_visible():
|
||||
log("[i] Klikám 'Click to log in with Johnson&Johnson'...")
|
||||
sso.first.click()
|
||||
page.wait_for_timeout(2500)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if in_vault(page):
|
||||
log("[i] Přihlášen přes SSO (bez formuláře).")
|
||||
return
|
||||
|
||||
# J&J login formulář (pokud session J&J nežije)
|
||||
user_box = page.locator("input[type='text'], input[type='email']").first
|
||||
try:
|
||||
user_box.wait_for(timeout=8000)
|
||||
log("[i] Vyplňuji přihlašovací údaje...")
|
||||
user_box.fill(os.environ["VAULT_USER"])
|
||||
pwd = page.locator("input[type='password']").first
|
||||
pwd.fill(os.environ["VAULT_PASS"])
|
||||
submit_login_form(page, pwd)
|
||||
except PWTimeout:
|
||||
if in_vault(page):
|
||||
return
|
||||
log("[!] Nenašel jsem login formulář — možná čeká SSO/2FA.")
|
||||
|
||||
# Výsledek / 2FA
|
||||
try:
|
||||
page.wait_for_url(VAULT_UI_PATTERN, timeout=15000)
|
||||
log("[ok] Přihlášen (bez 2FA).")
|
||||
return
|
||||
except PWTimeout:
|
||||
pass
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print(" VYŽADOVÁNO OVĚŘENÍ NA TELEFONU (2FA).")
|
||||
print(" Potvrď přihlášení v mobilní aplikaci.")
|
||||
print("=" * 60)
|
||||
input(" Až to potvrdíš, stiskni ENTER pro pokračování... ")
|
||||
page.wait_for_url(VAULT_UI_PATTERN, timeout=120000)
|
||||
log("[ok] Přihlášení dokončeno.")
|
||||
|
||||
|
||||
# --- Diagnostika -------------------------------------------------------
|
||||
|
||||
def save_page_debug(page, tag):
|
||||
out = DEBUG_DIR / datetime.now().strftime(f"%Y-%m-%d_%H-%M-%S_{tag}")
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
page.screenshot(path=str(out / "screenshot.png"), full_page=False)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
(out / "page.html").write_text(page.content(), encoding="utf-8")
|
||||
except Exception:
|
||||
pass
|
||||
log(f"[!] Diagnostika uložena do: {out}")
|
||||
return out
|
||||
|
||||
|
||||
# --- Export reportu do CSV ---------------------------------------------
|
||||
|
||||
def open_actions_menu(page):
|
||||
"""Otevře ⋯ (Actions) menu reportu. Vrací True/False."""
|
||||
selectors = [
|
||||
".actionMenuContainer .dropDown.vv_dropdown_toggle button.vv-icon-button",
|
||||
".actionMenuContainer button.vv-icon-button",
|
||||
".actionMenuContainer button",
|
||||
"button[title='Actions'], [aria-label='Actions']",
|
||||
]
|
||||
for sel in selectors:
|
||||
loc = page.locator(sel)
|
||||
try:
|
||||
if loc.count() and loc.first.is_visible():
|
||||
loc.first.click()
|
||||
return True
|
||||
except Exception:
|
||||
continue
|
||||
return False
|
||||
|
||||
|
||||
def wait_report_ready(page):
|
||||
"""Počká, až report DOPOČÍTÁ. Spolehlivý signál je toast 'Running
|
||||
report …', který se objeví během počítání a zmizí, jakmile je hotovo.
|
||||
(Čekat jen na text 'Returned' nestačí — v SPA tam může zůstat zbytek
|
||||
předchozího reportu a export by sebral prázdný/rozpočítaný grid.)"""
|
||||
toast = page.locator("text=/Running report/i")
|
||||
# 1) toast se objeví = nový report se rozběhl (když ne, je bleskový)
|
||||
appeared = False
|
||||
try:
|
||||
toast.first.wait_for(state="visible", timeout=12000)
|
||||
appeared = True
|
||||
except PWTimeout:
|
||||
pass
|
||||
# 2) toast zmizí = report dopočítán
|
||||
if appeared:
|
||||
try:
|
||||
toast.first.wait_for(state="hidden", timeout=REPORT_TIMEOUT_MS)
|
||||
except PWTimeout:
|
||||
save_page_debug(page, "report_running_timeout")
|
||||
raise RuntimeError("Report se nedopočítal (toast 'Running report' "
|
||||
"nezmizel). Diagnostika v debug/.")
|
||||
# 3) pojistka: počkat na "Returned N records" (i 0) + usazení gridu
|
||||
try:
|
||||
page.wait_for_selector("text=/Returned\\s+\\d/i", timeout=20000)
|
||||
except PWTimeout:
|
||||
pass
|
||||
page.wait_for_timeout(3000)
|
||||
|
||||
|
||||
def export_report_csv(page, url, dest):
|
||||
"""Otevře report přes přímý link, počká na dopočítání a vyexportuje
|
||||
do CSV (Data Only). Uloží do dest. Při selhání -> debug/ + výjimka."""
|
||||
log(f"[i] Otevírám report: {url[:90]}...")
|
||||
page.goto(url, wait_until="domcontentloaded")
|
||||
dismiss_maintenance_popup(page, timeout=4000)
|
||||
|
||||
# ověř, že vůbec existuje report viewer (⋯ menu v hlavičce se vykreslí
|
||||
# rychle, ještě před dopočítáním dat) — jinak nemá smysl čekat na toast
|
||||
try:
|
||||
page.wait_for_selector(".actionMenuContainer", timeout=20000)
|
||||
except PWTimeout:
|
||||
save_page_debug(page, "report_load")
|
||||
raise RuntimeError("Report viewer se nenačetl. Diagnostika v debug/.")
|
||||
|
||||
wait_report_ready(page)
|
||||
log("[i] Report dopočítán, otevírám ⋯ a exportuji do CSV...")
|
||||
|
||||
if not open_actions_menu(page):
|
||||
save_page_debug(page, "menu")
|
||||
raise RuntimeError("Nenašel jsem ⋯ (Actions) menu. Diagnostika v debug/.")
|
||||
|
||||
# Položka "Export to CSV" (menu se načítá asynchronně).
|
||||
item = page.locator("a.ReportAction[data-action-name='CsvExport']")
|
||||
try:
|
||||
item.first.wait_for(state="visible", timeout=15000)
|
||||
except PWTimeout:
|
||||
item = page.get_by_text("Export to CSV", exact=True)
|
||||
try:
|
||||
item.first.wait_for(state="visible", timeout=5000)
|
||||
except PWTimeout:
|
||||
save_page_debug(page, "csv_item")
|
||||
raise RuntimeError("Nenašel jsem 'Export to CSV'. Diagnostika v debug/.")
|
||||
|
||||
# Download může přijít buď rovnou po kliknutí na položku, nebo až po
|
||||
# potvrzení v dialogu (Data Only -> Export). Pokryjeme obojí.
|
||||
with page.expect_download(timeout=DL_TIMEOUT_MS) as dl_info:
|
||||
item.first.click()
|
||||
# Volitelný dialog "Export Options": Data Only + tlačítko Export.
|
||||
try:
|
||||
radio = page.locator(
|
||||
"input[name='requiredRadioField'][value='STANDARD']")
|
||||
radio.first.wait_for(state="visible", timeout=6000)
|
||||
if not radio.first.is_checked():
|
||||
radio.first.check()
|
||||
export_btn = page.get_by_role("button", name="Export", exact=True)
|
||||
export_btn.first.wait_for(state="visible", timeout=6000)
|
||||
export_btn.first.click()
|
||||
except PWTimeout:
|
||||
pass # žádný dialog -> download už běží z kliknutí na položku
|
||||
download = dl_info.value
|
||||
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
download.save_as(str(dest))
|
||||
try:
|
||||
n_rows = max(0, sum(1 for _ in dest.open(encoding="utf-8",
|
||||
errors="ignore")) - 1)
|
||||
log(f"[ok] Uloženo: {dest.name} ({n_rows} datových řádků)")
|
||||
except Exception:
|
||||
log(f"[ok] Uloženo: {dest.name}")
|
||||
return dest
|
||||
|
||||
|
||||
# --- Main --------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
ensure_credentials()
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Sestav seznam úkolů (report × studie)
|
||||
tasks = []
|
||||
for study in STUDIES_TO_RUN:
|
||||
if study not in STUDIES:
|
||||
log(f"[!] Studie {study} není v STUDIES — přeskakuji.")
|
||||
continue
|
||||
for rep in REPORTS:
|
||||
tasks.append((rep, study))
|
||||
log(f"[i] Ke zpracování: {len(tasks)} reportů "
|
||||
f"({len(STUDIES_TO_RUN)} studie × {len(REPORTS)} reportů).")
|
||||
|
||||
ok_count = fail_count = 0
|
||||
with sync_playwright() as p:
|
||||
ctx = p.chromium.launch_persistent_context(
|
||||
user_data_dir=str(PROFILE_DIR),
|
||||
headless=False,
|
||||
accept_downloads=True,
|
||||
no_viewport=True,
|
||||
args=["--start-maximized"],
|
||||
)
|
||||
page = ctx.pages[0] if ctx.pages else ctx.new_page()
|
||||
try:
|
||||
login_if_needed(page)
|
||||
dismiss_maintenance_popup(page, timeout=4000)
|
||||
|
||||
ts = datetime.now().strftime("%Y-%m-%d_%H-%M")
|
||||
for n, (rep, study) in enumerate(tasks, 1):
|
||||
short_study = study # plné číslo studie do názvu
|
||||
fname = f"{ts}_{rep['code']}_{short_study}_CZ.csv"
|
||||
dest = OUTPUT_DIR / fname
|
||||
url = build_report_url(rep, study)
|
||||
log(f"\n--- [{n}/{len(tasks)}] {rep['code']} | {study}")
|
||||
last_err = None
|
||||
for attempt in range(1, MAX_ATTEMPTS + 1):
|
||||
try:
|
||||
export_report_csv(page, url, dest)
|
||||
ok_count += 1
|
||||
last_err = None
|
||||
break
|
||||
except Exception as e:
|
||||
last_err = e
|
||||
log(f"[!] Pokus {attempt}/{MAX_ATTEMPTS} selhal: {e}")
|
||||
if attempt < MAX_ATTEMPTS:
|
||||
page.wait_for_timeout(RETRY_PAUSE_MS)
|
||||
if last_err is not None:
|
||||
fail_count += 1
|
||||
page.wait_for_timeout(BETWEEN_MS)
|
||||
except KeyboardInterrupt:
|
||||
log("\n[!] Přerušeno uživatelem.")
|
||||
finally:
|
||||
log(f"\n[i] Hotovo: {ok_count} OK, {fail_count} chyb. "
|
||||
f"Výstup: {OUTPUT_DIR}")
|
||||
log("[i] Zavírám prohlížeč.")
|
||||
ctx.close()
|
||||
sys.exit(1 if fail_count else 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user