Initial commit — clean history (removed large test files, browser profiles, Medidata/Clario downloads)
@@ -0,0 +1,5 @@
|
||||
IMEDIDATA_USERNAME=vladimir.buzalka
|
||||
IMEDIDATA_PASSWORD=Mar2026Ax162q8+
|
||||
DOWNLOAD_DIR=./downloads
|
||||
|
||||
|
||||
@@ -0,0 +1,489 @@
|
||||
"""
|
||||
download_report.py
|
||||
NAHRAZENO skriptem download_edc_datalistings.py
|
||||
|
||||
Původně: stahování Data Listing reportů pro studii MDD3003 (CZE).
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||||
import tkinter as tk
|
||||
from tkinter import simpledialog
|
||||
|
||||
load_dotenv(Path(__file__).parent / ".env")
|
||||
|
||||
USERNAME = os.getenv("IMEDIDATA_USERNAME", "vladimir.buzalka")
|
||||
PASSWORD = os.getenv("IMEDIDATA_PASSWORD", "")
|
||||
DOWNLOAD_DIR = Path(__file__).parent / "downloads"
|
||||
AUTH_FILE = Path(__file__).parent / "auth.json"
|
||||
AUTH_MAX_AGE_DAYS = 7
|
||||
|
||||
LOGIN_URL = "https://login.imedidata.com/login"
|
||||
SELECT_ROLE_URL = (
|
||||
"https://jnjja.mdsol.com/MedidataRave/SelectRole.aspx"
|
||||
"?client_division_uuid=e5de55d5-a414-4bd1-9abe-18e96fd5475d"
|
||||
"&study_group_uuid=b0793ca6-33ec-44e8-883b-6fc1a4b671c4"
|
||||
"&studygroup_id=107981"
|
||||
)
|
||||
|
||||
STUDY_NAME = "42847922MDD3003"
|
||||
SITE_GROUP = "CZE"
|
||||
FORM_NAMES = [
|
||||
"Date of Visit",
|
||||
"Vital Signs",
|
||||
"Interim Investigator Signature",
|
||||
]
|
||||
REPORT_ID = 92 # _EDC Std Rpt - Data Listing (Data Stream)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def auth_valid():
|
||||
if not AUTH_FILE.exists():
|
||||
return False
|
||||
age = datetime.now() - datetime.fromtimestamp(AUTH_FILE.stat().st_mtime)
|
||||
return age < timedelta(days=AUTH_MAX_AGE_DAYS)
|
||||
|
||||
|
||||
def wait_load(page, extra_ms=1000):
|
||||
"""Čeká na 'load' event + extra pauza. Rave nikdy nedosáhne networkidle."""
|
||||
try:
|
||||
page.wait_for_load_state("load", timeout=20_000)
|
||||
except PWTimeout:
|
||||
pass
|
||||
page.wait_for_timeout(extra_ms)
|
||||
|
||||
|
||||
def dbg(page, label):
|
||||
print(f"[{label}] URL: {page.url}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Login
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _ask_otp_popup():
|
||||
"""Zobrazí GUI dialog pro zadání OKTA OTP kódu."""
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
root.lift()
|
||||
root.attributes("-topmost", True)
|
||||
otp = simpledialog.askstring(
|
||||
"OKTA MFA",
|
||||
"Zadej OTP kód z OKTA (6 číslic):",
|
||||
parent=root,
|
||||
)
|
||||
root.destroy()
|
||||
return (otp or "").strip()
|
||||
|
||||
|
||||
def do_login(page, context):
|
||||
print("Přihlašuji se do iMedidata...")
|
||||
page.goto(LOGIN_URL)
|
||||
wait_load(page, 500)
|
||||
dbg(page, "login-page")
|
||||
|
||||
# Pole username a password mají jméno session[username] / session[password]
|
||||
page.wait_for_selector('input[name="session[username]"]', timeout=10_000)
|
||||
page.fill('input[name="session[username]"]', USERNAME)
|
||||
page.fill('input[name="session[password]"]', PASSWORD)
|
||||
page.click('button[type="submit"]')
|
||||
|
||||
# Čekáme na přesměrování — může jít přes OKTA nebo rovnou na home
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "after-signin")
|
||||
|
||||
# OKTA MFA?
|
||||
if _okta_mfa_present(page):
|
||||
print("\n*** OKTA MFA vyžadována! ***")
|
||||
otp = _ask_otp_popup()
|
||||
if not otp:
|
||||
print("CHYBA: OTP nebylo zadáno.")
|
||||
sys.exit(1)
|
||||
_fill_otp(page, otp)
|
||||
# Čekáme na zpracování OTP a redirect zpět na iMedidata
|
||||
wait_load(page, 3000)
|
||||
dbg(page, "after-otp")
|
||||
|
||||
# Počkáme až budeme na home.imedidata.com
|
||||
try:
|
||||
page.wait_for_url("**/home.imedidata.com**", timeout=30_000)
|
||||
except PWTimeout:
|
||||
dbg(page, "wait-home-timeout")
|
||||
|
||||
dbg(page, "final-login")
|
||||
|
||||
if "home.imedidata.com" not in page.url:
|
||||
print("CHYBA: Přihlášení se nezdařilo! Zkontroluj heslo nebo OKTA kód.")
|
||||
input("Zmáčkni Enter pro ukončení...")
|
||||
sys.exit(1)
|
||||
|
||||
context.storage_state(path=str(AUTH_FILE))
|
||||
print("Session uložena do auth.json")
|
||||
|
||||
|
||||
def _okta_mfa_present(page):
|
||||
if "okta" in page.url.lower():
|
||||
return True
|
||||
for sel in [
|
||||
'input[name="answer"]',
|
||||
'input[name*="otp"]',
|
||||
'input[name*="code"]',
|
||||
'input[placeholder*="code" i]',
|
||||
]:
|
||||
if page.query_selector(sel):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _fill_otp(page, otp):
|
||||
for sel in [
|
||||
'input[name="answer"]',
|
||||
'input[name*="otp"]',
|
||||
'input[name*="code"]',
|
||||
'input[type="tel"]',
|
||||
'input[placeholder*="code" i]',
|
||||
]:
|
||||
el = page.query_selector(sel)
|
||||
if el:
|
||||
el.fill(otp)
|
||||
page.keyboard.press("Enter")
|
||||
return
|
||||
# Záložní: zkusíme první viditelný text input
|
||||
page.keyboard.type(otp)
|
||||
page.keyboard.press("Enter")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Navigace po přihlášení
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def go_to_select_role(page):
|
||||
"""Přejde na SelectRole stránku a vrátí True pokud jsme tam skutečně."""
|
||||
print(f"Navigace na SelectRole...")
|
||||
try:
|
||||
page.goto(SELECT_ROLE_URL)
|
||||
except Exception:
|
||||
# Rave dělá server-side redirect (ERR_ABORTED) — zkontrolujeme URL až po načtení
|
||||
pass
|
||||
wait_load(page, 1500)
|
||||
dbg(page, "select-role")
|
||||
return "login" not in page.url.lower() and "okta" not in page.url.lower()
|
||||
|
||||
|
||||
def select_role(page):
|
||||
"""Vybere Site Manager a klikne Continue."""
|
||||
print("Vybírám roli Site Manager...")
|
||||
|
||||
# Počkáme na select element
|
||||
try:
|
||||
page.wait_for_selector("select", timeout=10_000)
|
||||
except PWTimeout:
|
||||
dbg(page, "no-select-found")
|
||||
return
|
||||
|
||||
# Najdeme select s option Site Manager
|
||||
selects = page.query_selector_all("select")
|
||||
found = False
|
||||
for sel_el in selects:
|
||||
opts = sel_el.query_selector_all("option")
|
||||
for opt in opts:
|
||||
txt = (opt.inner_text() or "").strip()
|
||||
if "site manager" in txt.lower():
|
||||
sel_el.select_option(label=txt)
|
||||
found = True
|
||||
print(f" Vybráno: '{txt}'")
|
||||
break
|
||||
if found:
|
||||
break
|
||||
|
||||
if not found:
|
||||
print(" VAROVÁNÍ: Option 'Site Manager' nenalezena, zkouším kliknout na text...")
|
||||
try:
|
||||
page.get_by_text("Site Manager", exact=False).first.click()
|
||||
except Exception as e:
|
||||
print(f" {e}")
|
||||
|
||||
# Klikneme Continue
|
||||
for btn_sel in [
|
||||
'input[value="Continue"]',
|
||||
'input[type="submit"]',
|
||||
'button:has-text("Continue")',
|
||||
'button[type="submit"]',
|
||||
]:
|
||||
try:
|
||||
btn = page.query_selector(btn_sel)
|
||||
if btn:
|
||||
btn.click()
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "after-role")
|
||||
|
||||
|
||||
def navigate_to_reporter(page):
|
||||
print("Klikám na Reporter...")
|
||||
try:
|
||||
page.wait_for_selector('a:has-text("Reporter")', timeout=15_000)
|
||||
page.click('a:has-text("Reporter")')
|
||||
wait_load(page, 1500)
|
||||
dbg(page, "reporter")
|
||||
except PWTimeout:
|
||||
dbg(page, "reporter-not-found")
|
||||
raise
|
||||
|
||||
|
||||
def open_report(page):
|
||||
print(f"Klikám na report ID={REPORT_ID} (Data Listing - Data Stream)...")
|
||||
selector = f'a[href="PromptsPage.aspx?ReportID={REPORT_ID}"]'
|
||||
try:
|
||||
page.wait_for_selector(selector, timeout=15_000)
|
||||
page.click(selector)
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "report-opened")
|
||||
except PWTimeout:
|
||||
dbg(page, "report-not-found")
|
||||
raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parametry reportu
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def set_study_param(page):
|
||||
"""Rozbalí Study panel a vybere 42847922MDD3003."""
|
||||
print(f" Parametr Study: {STUDY_NAME}")
|
||||
|
||||
page.click('#PromptsBox_st_ShowHideBtn')
|
||||
page.wait_for_timeout(1500)
|
||||
|
||||
# Checkbox index 0 = 42847922MDD3003 (ověřeno dříve)
|
||||
page.wait_for_selector('#PromptsBox_st_FrontEndCBList_0', timeout=10_000)
|
||||
cb = page.locator('#PromptsBox_st_FrontEndCBList_0')
|
||||
if not cb.is_checked():
|
||||
cb.check()
|
||||
wait_load(page, 3000)
|
||||
dbg(page, "after-study")
|
||||
|
||||
|
||||
def set_site_group_param(page):
|
||||
"""Rozbalí Site Group, vybere CZE a zaškrtne Include Sub Site Groups."""
|
||||
print(f" Parametr Site Group: {SITE_GROUP}")
|
||||
|
||||
# Rozbalit Site Group panel
|
||||
page.click('#PromptsBox_sg_ShowHideBtn')
|
||||
page.wait_for_timeout(1500)
|
||||
|
||||
# Vybrat CZE a spustit change event (jinak postback nepřijde)
|
||||
page.wait_for_selector('#PromptsBox_sg_List', timeout=10_000)
|
||||
page.select_option('#PromptsBox_sg_List', label=SITE_GROUP)
|
||||
page.evaluate("document.querySelector('#PromptsBox_sg_List').dispatchEvent(new Event('change', {bubbles:true}))")
|
||||
wait_load(page, 2000)
|
||||
|
||||
# Include Sub Site Groups
|
||||
print(" Include Sub Site Groups: zapnuto")
|
||||
cb = page.locator('#PromptsBox_sg_CheckBox')
|
||||
if not cb.is_checked():
|
||||
cb.check()
|
||||
page.evaluate("document.querySelector('#PromptsBox_sg_CheckBox').dispatchEvent(new Event('change', {bubbles:true}))")
|
||||
wait_load(page, 2000)
|
||||
|
||||
# Zavřít panel = potvrzení výběru, spustí postback pro Form
|
||||
page.click('#PromptsBox_sg_ShowHideBtn')
|
||||
wait_load(page, 3000)
|
||||
dbg(page, "after-site-group")
|
||||
|
||||
|
||||
def set_form_param(page, form_name):
|
||||
"""Rozbalí Form panel (pokud je zavřený) a zaškrtne formulář.
|
||||
Panel je SingleSelection=1, takže nový výběr automaticky odznačí předchozí."""
|
||||
print(f" Parametr Form: {form_name}")
|
||||
|
||||
# Otevřít panel jen pokud je zavřený (kontrola přes style.display)
|
||||
is_closed = page.locator('#PromptsBox_fm2_div').evaluate('el => el.style.display') == 'none'
|
||||
if is_closed:
|
||||
page.click('#PromptsBox_fm2_ShowHideBtn')
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
# Po předchozím stažení je panel v "locked" módu.
|
||||
# 1. klik na tužku → vymaže výběr, tlačítko se změní na oko
|
||||
# 2. klik na oko → načte seznam všech formulářů
|
||||
if page.locator('#PromptsBox_fm2_PageModeBtn').is_visible():
|
||||
page.click('#PromptsBox_fm2_PageModeBtn') # tužka → oko
|
||||
page.wait_for_timeout(1000)
|
||||
page.click('#PromptsBox_fm2_PageModeBtn') # oko → načte formuláře
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
# Vyhledat formulář — klik zajistí focus, Enter spustí ajaxSelectionGridSearchBoxOnKeypress
|
||||
search = page.locator('#PromptsBox_fm2_SearchTxt')
|
||||
search.wait_for(state='visible', timeout=10_000)
|
||||
search.click()
|
||||
search.fill(form_name)
|
||||
search.press('Enter')
|
||||
|
||||
# Počkáme až AJAX přepíše DOM se seznamem výsledků
|
||||
cb_locator = page.locator('input[id^="PromptsBox_fm2_FrontEndCBList_"]').first
|
||||
try:
|
||||
cb_locator.wait_for(state='visible', timeout=8_000)
|
||||
except PWTimeout:
|
||||
print(f" VAROVÁNÍ: '{form_name}' nenalezen nebo timeout!")
|
||||
return
|
||||
|
||||
# SingleSelection=1: klik na nový checkbox automaticky odznačí předchozí
|
||||
# Locator se vyhodnotí čerstvě — žádný stale element handle
|
||||
if not cb_locator.is_checked():
|
||||
cb_locator.click()
|
||||
print(f" '{form_name}' zaškrtnuto")
|
||||
wait_load(page, 500)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Submit a download
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def submit_and_download(page, context, form_name):
|
||||
print("Odesílám report (čekám na nové okno)...")
|
||||
|
||||
with context.expect_page() as new_page_info:
|
||||
page.locator('input[value="Submit Report"], button:has-text("Submit Report")').first.click()
|
||||
|
||||
new_page = new_page_info.value
|
||||
new_page.wait_for_url(lambda url: url != 'about:blank', timeout=30_000)
|
||||
# Čekáme až se zobrazí Download File — stránka nejdřív ukazuje "Loading"
|
||||
print(" Čekám na vygenerování reportu...")
|
||||
new_page.wait_for_selector(
|
||||
'input[value="Download File"], button:has-text("Download File")',
|
||||
timeout=300_000 # až 5 minut pro velké reporty
|
||||
)
|
||||
new_page.wait_for_timeout(500)
|
||||
dbg(new_page, "download-window")
|
||||
|
||||
# Nastavení parametrů stahování
|
||||
print(" Nastavuji parametry stahování...")
|
||||
|
||||
# Separator: čárka (default)
|
||||
sep = new_page.query_selector('input[name*="Separator"], input[name*="separator"]')
|
||||
if sep:
|
||||
sep.fill(',')
|
||||
|
||||
# File type: .csv
|
||||
# Formulář je v iframu — najdeme správný frame
|
||||
target_frame = new_page.main_frame
|
||||
for frame in new_page.frames:
|
||||
if frame.query_selector('select') or frame.query_selector('input[value="Download File"]'):
|
||||
target_frame = frame
|
||||
print(f" Frame nalezen: {frame.url}")
|
||||
break
|
||||
|
||||
# File type: .csv (application/vnd.ms-excel)
|
||||
for sel in target_frame.query_selector_all('select'):
|
||||
for opt in sel.query_selector_all('option'):
|
||||
val = opt.get_attribute('value') or ''
|
||||
txt = opt.inner_text() or ''
|
||||
if 'vnd.ms-excel' in val or 'vnd.ms-excel' in txt:
|
||||
sel.select_option(value=val)
|
||||
print(" File type: .csv (application/vnd.ms-excel)")
|
||||
break
|
||||
|
||||
# Export type: attachment
|
||||
for sel in target_frame.query_selector_all('select'):
|
||||
for opt in sel.query_selector_all('option'):
|
||||
if 'attachment' in (opt.get_attribute('value') or '').lower():
|
||||
sel.select_option(value='attachment')
|
||||
break
|
||||
|
||||
# Save as Unicode: necháme nezaškrtnuté (default)
|
||||
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
|
||||
form_slug = form_name.replace(" ", "")
|
||||
filename = f"{timestamp}_EDC_MDD3003_{form_slug}_DataListing.csv"
|
||||
output_path = DOWNLOAD_DIR / filename
|
||||
|
||||
print("Stahuji CSV...")
|
||||
with new_page.expect_download(timeout=60_000) as dl_info:
|
||||
btn = target_frame.query_selector('input[value="Download File"], button:has-text("Download File")')
|
||||
if btn:
|
||||
btn.click()
|
||||
else:
|
||||
new_page.locator('input[value="Download File"], button:has-text("Download File")').first.click()
|
||||
|
||||
download = dl_info.value
|
||||
download.save_as(str(output_path))
|
||||
print(f"\nHotovo! Soubor uložen: {output_path}")
|
||||
|
||||
try:
|
||||
new_page.close()
|
||||
print("Stahovací okno zavřeno.")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hlavní flow
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run():
|
||||
if not PASSWORD:
|
||||
print("Chyba: nastav IMEDIDATA_PASSWORD v souboru .env")
|
||||
sys.exit(1)
|
||||
|
||||
DOWNLOAD_DIR.mkdir(exist_ok=True)
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False, slow_mo=200)
|
||||
ctx_kwargs = {"accept_downloads": True}
|
||||
|
||||
use_saved = auth_valid()
|
||||
if use_saved:
|
||||
print("Načítám uloženou session (auth.json)...")
|
||||
ctx_kwargs["storage_state"] = str(AUTH_FILE)
|
||||
|
||||
context = browser.new_context(**ctx_kwargs)
|
||||
page = context.new_page()
|
||||
|
||||
# Přejdeme na SelectRole
|
||||
logged_in = go_to_select_role(page)
|
||||
|
||||
if not logged_in:
|
||||
if use_saved:
|
||||
print("Session expirovala, mažu auth.json a přihlašuji znovu...")
|
||||
AUTH_FILE.unlink(missing_ok=True)
|
||||
do_login(page, context)
|
||||
go_to_select_role(page)
|
||||
|
||||
# Krok 4: výběr role → přiřadí session ID
|
||||
select_role(page)
|
||||
|
||||
# Krok 5: Reporter
|
||||
navigate_to_reporter(page)
|
||||
|
||||
# Krok 6: otevření reportu
|
||||
open_report(page)
|
||||
|
||||
# Krok 7: nastavení parametrů (Study a Site Group jednou, Form v smyčce)
|
||||
print("Nastavuji parametry reportu...")
|
||||
set_study_param(page)
|
||||
set_site_group_param(page)
|
||||
|
||||
# Krok 8: smyčka přes formuláře
|
||||
for form_name in FORM_NAMES:
|
||||
print(f"\n=== Stahuji formulář: {form_name} ===")
|
||||
set_form_param(page, form_name)
|
||||
submit_and_download(page, context, form_name)
|
||||
|
||||
browser.close()
|
||||
print("Prohlížeč zavřen.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
@@ -0,0 +1,440 @@
|
||||
"""
|
||||
download_uco3001.py
|
||||
NAHRAZENO skriptem download_edc_datalistings.py
|
||||
|
||||
Původně: stahování Data Listing reportů (ReportID=92) pro studii UCO3001.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||||
import tkinter as tk
|
||||
from tkinter import simpledialog
|
||||
|
||||
load_dotenv(Path(__file__).parent / ".env")
|
||||
|
||||
USERNAME = os.getenv("IMEDIDATA_USERNAME", "vladimir.buzalka")
|
||||
PASSWORD = os.getenv("IMEDIDATA_PASSWORD", "")
|
||||
DOWNLOAD_DIR = Path(__file__).parent / "downloads"
|
||||
AUTH_FILE = Path(__file__).parent / "auth.json"
|
||||
AUTH_MAX_AGE_DAYS = 7
|
||||
|
||||
LOGIN_URL = "https://login.imedidata.com/login"
|
||||
SELECT_ROLE_URL = (
|
||||
"https://jnjja.mdsol.com/MedidataRave/SelectRole.aspx"
|
||||
"?client_division_uuid=e5de55d5-a414-4bd1-9abe-18e96fd5475d"
|
||||
"&study_group_uuid=b0793ca6-33ec-44e8-883b-6fc1a4b671c4"
|
||||
"&studygroup_id=107981"
|
||||
)
|
||||
|
||||
STUDY_SEARCH = "77242113UCO3001" # hledáme podle podřetězce v názvu studie
|
||||
REPORT_ID = 92 # _EDC Std Rpt - Data Listing (Data Stream)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def auth_valid():
|
||||
if not AUTH_FILE.exists():
|
||||
return False
|
||||
age = datetime.now() - datetime.fromtimestamp(AUTH_FILE.stat().st_mtime)
|
||||
return age < timedelta(days=AUTH_MAX_AGE_DAYS)
|
||||
|
||||
|
||||
def wait_load(page, extra_ms=1000):
|
||||
try:
|
||||
page.wait_for_load_state("load", timeout=20_000)
|
||||
except PWTimeout:
|
||||
pass
|
||||
page.wait_for_timeout(extra_ms)
|
||||
|
||||
|
||||
def dbg(page, label):
|
||||
print(f"[{label}] URL: {page.url}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Login
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _ask_otp_popup():
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
root.lift()
|
||||
root.attributes("-topmost", True)
|
||||
otp = simpledialog.askstring("OKTA MFA", "Zadej OTP kód z OKTA (6 číslic):", parent=root)
|
||||
root.destroy()
|
||||
return (otp or "").strip()
|
||||
|
||||
|
||||
def do_login(page, context):
|
||||
print("Přihlašuji se do iMedidata...")
|
||||
page.goto(LOGIN_URL)
|
||||
wait_load(page, 500)
|
||||
page.wait_for_selector('input[name="session[username]"]', timeout=10_000)
|
||||
page.fill('input[name="session[username]"]', USERNAME)
|
||||
page.fill('input[name="session[password]"]', PASSWORD)
|
||||
page.click('button[type="submit"]')
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "after-signin")
|
||||
|
||||
if _okta_mfa_present(page):
|
||||
print("\n*** OKTA MFA vyžadována! ***")
|
||||
otp = _ask_otp_popup()
|
||||
if not otp:
|
||||
print("CHYBA: OTP nebylo zadáno.")
|
||||
sys.exit(1)
|
||||
_fill_otp(page, otp)
|
||||
wait_load(page, 3000)
|
||||
|
||||
try:
|
||||
page.wait_for_url("**/home.imedidata.com**", timeout=30_000)
|
||||
except PWTimeout:
|
||||
dbg(page, "wait-home-timeout")
|
||||
|
||||
if "home.imedidata.com" not in page.url:
|
||||
print("CHYBA: Přihlášení se nezdařilo!")
|
||||
sys.exit(1)
|
||||
|
||||
context.storage_state(path=str(AUTH_FILE))
|
||||
print("Session uložena do auth.json")
|
||||
|
||||
|
||||
def _okta_mfa_present(page):
|
||||
if "okta" in page.url.lower():
|
||||
return True
|
||||
for sel in ['input[name="answer"]', 'input[name*="otp"]',
|
||||
'input[name*="code"]', 'input[placeholder*="code" i]']:
|
||||
if page.query_selector(sel):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _fill_otp(page, otp):
|
||||
for sel in ['input[name="answer"]', 'input[name*="otp"]',
|
||||
'input[name*="code"]', 'input[type="tel"]', 'input[placeholder*="code" i]']:
|
||||
el = page.query_selector(sel)
|
||||
if el:
|
||||
el.fill(otp)
|
||||
page.keyboard.press("Enter")
|
||||
return
|
||||
page.keyboard.type(otp)
|
||||
page.keyboard.press("Enter")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Navigace
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def go_to_select_role(page):
|
||||
print("Navigace na SelectRole...")
|
||||
try:
|
||||
page.goto(SELECT_ROLE_URL)
|
||||
except Exception:
|
||||
pass
|
||||
wait_load(page, 1500)
|
||||
dbg(page, "select-role")
|
||||
return "login" not in page.url.lower() and "okta" not in page.url.lower()
|
||||
|
||||
|
||||
def select_role(page):
|
||||
print("Vybírám roli Site Manager...")
|
||||
try:
|
||||
page.wait_for_selector("select", timeout=10_000)
|
||||
except PWTimeout:
|
||||
return
|
||||
|
||||
for sel_el in page.query_selector_all("select"):
|
||||
for opt in sel_el.query_selector_all("option"):
|
||||
txt = (opt.inner_text() or "").strip()
|
||||
if "site manager" in txt.lower():
|
||||
sel_el.select_option(label=txt)
|
||||
print(f" Vybráno: '{txt}'")
|
||||
break
|
||||
|
||||
for btn_sel in ['input[value="Continue"]', 'input[type="submit"]',
|
||||
'button:has-text("Continue")', 'button[type="submit"]']:
|
||||
btn = page.query_selector(btn_sel)
|
||||
if btn:
|
||||
btn.click()
|
||||
break
|
||||
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "after-role")
|
||||
|
||||
|
||||
def navigate_to_reporter(page):
|
||||
print("Klikám na Reporter...")
|
||||
page.wait_for_selector('a:has-text("Reporter")', timeout=15_000)
|
||||
page.click('a:has-text("Reporter")')
|
||||
wait_load(page, 1500)
|
||||
dbg(page, "reporter")
|
||||
|
||||
|
||||
def open_report(page):
|
||||
print(f"Otevírám report ID={REPORT_ID} (Data Listing - Data Stream)...")
|
||||
selector = f'a[href="PromptsPage.aspx?ReportID={REPORT_ID}"]'
|
||||
page.wait_for_selector(selector, timeout=15_000)
|
||||
page.click(selector)
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "report-opened")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parametry reportu
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def set_study_param(page):
|
||||
"""Rozbalí Study panel a vybere studii podle podřetězce STUDY_SEARCH."""
|
||||
print(f" Parametr Study: hledám '{STUDY_SEARCH}'...")
|
||||
|
||||
page.click('#PromptsBox_st_ShowHideBtn')
|
||||
page.wait_for_timeout(1500)
|
||||
|
||||
# Projdeme checkboxy a hledáme label obsahující STUDY_SEARCH
|
||||
page.wait_for_selector('input[id^="PromptsBox_st_FrontEndCBList_"]', timeout=10_000)
|
||||
checkboxes = page.query_selector_all('input[id^="PromptsBox_st_FrontEndCBList_"]')
|
||||
|
||||
found = False
|
||||
for cb in checkboxes:
|
||||
cb_id = cb.get_attribute("id")
|
||||
# Label je ve stejné <td> nebo sousední — hledáme přes JS innerText rodiče
|
||||
label_text = page.evaluate(
|
||||
"""id => {
|
||||
const el = document.getElementById(id);
|
||||
if (!el) return '';
|
||||
const row = el.closest('tr') || el.closest('td') || el.parentElement;
|
||||
return row ? row.innerText : '';
|
||||
}""",
|
||||
cb_id
|
||||
)
|
||||
print(f" [{cb_id}] label: {label_text.strip()[:80]}")
|
||||
if STUDY_SEARCH.upper() in label_text.upper():
|
||||
if not page.locator(f"#{cb_id}").is_checked():
|
||||
page.locator(f"#{cb_id}").check()
|
||||
print(f" Nalezeno a zaškrtnuto: '{label_text.strip()}'")
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
# Záloha: zkusíme index 0 a varujeme
|
||||
print(f" VAROVÁNÍ: Studie '{STUDY_SEARCH}' nenalezena! Zkouším index 0...")
|
||||
cb0 = page.locator('#PromptsBox_st_FrontEndCBList_0')
|
||||
if not cb0.is_checked():
|
||||
cb0.check()
|
||||
|
||||
wait_load(page, 3000)
|
||||
dbg(page, "after-study")
|
||||
|
||||
|
||||
def set_site_group_param(page, country: str):
|
||||
"""Rozbalí Site Group, vybere zadanou zemi a zaškrtne Include Sub Site Groups."""
|
||||
print(f" Parametr Site Group: {country}")
|
||||
|
||||
page.click('#PromptsBox_sg_ShowHideBtn')
|
||||
page.wait_for_timeout(1500)
|
||||
|
||||
page.wait_for_selector('#PromptsBox_sg_List', timeout=10_000)
|
||||
page.select_option('#PromptsBox_sg_List', label=country)
|
||||
page.evaluate(
|
||||
"document.querySelector('#PromptsBox_sg_List').dispatchEvent(new Event('change', {bubbles:true}))"
|
||||
)
|
||||
wait_load(page, 2000)
|
||||
|
||||
cb = page.locator('#PromptsBox_sg_CheckBox')
|
||||
if not cb.is_checked():
|
||||
cb.check()
|
||||
page.evaluate(
|
||||
"document.querySelector('#PromptsBox_sg_CheckBox').dispatchEvent(new Event('change', {bubbles:true}))"
|
||||
)
|
||||
wait_load(page, 2000)
|
||||
|
||||
# Zavřít panel = potvrzení → spustí postback pro Form
|
||||
page.click('#PromptsBox_sg_ShowHideBtn')
|
||||
wait_load(page, 3000)
|
||||
dbg(page, "after-site-group")
|
||||
|
||||
|
||||
def set_form_param(page, form_name: str):
|
||||
"""Vybere formulář v Form panelu."""
|
||||
print(f" Parametr Form: {form_name}")
|
||||
|
||||
is_closed = page.locator('#PromptsBox_fm2_div').evaluate('el => el.style.display') == 'none'
|
||||
if is_closed:
|
||||
page.click('#PromptsBox_fm2_ShowHideBtn')
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
if page.locator('#PromptsBox_fm2_PageModeBtn').is_visible():
|
||||
page.click('#PromptsBox_fm2_PageModeBtn')
|
||||
page.wait_for_timeout(1000)
|
||||
page.click('#PromptsBox_fm2_PageModeBtn')
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
search = page.locator('#PromptsBox_fm2_SearchTxt')
|
||||
search.wait_for(state='visible', timeout=10_000)
|
||||
search.click()
|
||||
search.fill(form_name)
|
||||
page.wait_for_timeout(2000)
|
||||
search.press('Enter')
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
cb_locator = page.locator('input[id^="PromptsBox_fm2_FrontEndCBList_"]').first
|
||||
try:
|
||||
cb_locator.wait_for(state='visible', timeout=8_000)
|
||||
except PWTimeout:
|
||||
print(f" VAROVÁNÍ: '{form_name}' nenalezen!")
|
||||
return
|
||||
|
||||
if not cb_locator.is_checked():
|
||||
cb_locator.click()
|
||||
print(f" '{form_name}' zaškrtnuto")
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Submit a download
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def submit_and_download(page, context, form_name: str, country: str | None):
|
||||
print("Odesílám report...")
|
||||
|
||||
with context.expect_page() as new_page_info:
|
||||
page.locator('input[value="Submit Report"], button:has-text("Submit Report")').first.click()
|
||||
|
||||
new_page = new_page_info.value
|
||||
new_page.wait_for_url(lambda url: url != 'about:blank', timeout=30_000)
|
||||
|
||||
print(" Čekám na vygenerování reportu (max 5 min)...")
|
||||
new_page.wait_for_selector(
|
||||
'input[value="Download File"], button:has-text("Download File")',
|
||||
timeout=300_000
|
||||
)
|
||||
new_page.wait_for_timeout(500)
|
||||
dbg(new_page, "download-window")
|
||||
|
||||
# Najdeme správný frame
|
||||
target_frame = new_page.main_frame
|
||||
for frame in new_page.frames:
|
||||
if frame.query_selector('select') or frame.query_selector('input[value="Download File"]'):
|
||||
target_frame = frame
|
||||
break
|
||||
|
||||
# File type: .csv (application/vnd.ms-excel)
|
||||
for sel in target_frame.query_selector_all('select'):
|
||||
for opt in sel.query_selector_all('option'):
|
||||
val = opt.get_attribute('value') or ''
|
||||
if 'vnd.ms-excel' in val:
|
||||
sel.select_option(value=val)
|
||||
print(" File type: .csv (application/vnd.ms-excel)")
|
||||
break
|
||||
|
||||
# Export type: attachment
|
||||
for sel in target_frame.query_selector_all('select'):
|
||||
for opt in sel.query_selector_all('option'):
|
||||
if 'attachment' in (opt.get_attribute('value') or '').lower():
|
||||
sel.select_option(value='attachment')
|
||||
break
|
||||
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
|
||||
country_slug = country if country else "ALL"
|
||||
form_slug = form_name.replace(" ", "").replace("/", "-").replace("(", "").replace(")", "")
|
||||
filename = f"{timestamp}_EDC_UCO3001_{country_slug}_{form_slug}_DataListing.csv"
|
||||
output_path = DOWNLOAD_DIR / filename
|
||||
|
||||
print("Stahuji CSV...")
|
||||
with new_page.expect_download(timeout=60_000) as dl_info:
|
||||
btn = target_frame.query_selector('input[value="Download File"], button:has-text("Download File")')
|
||||
if btn:
|
||||
btn.click()
|
||||
else:
|
||||
new_page.locator('input[value="Download File"], button:has-text("Download File")').first.click()
|
||||
|
||||
dl_info.value.save_as(str(output_path))
|
||||
print(f"\nHotovo! Soubor uložen: {output_path}")
|
||||
|
||||
try:
|
||||
new_page.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hlavní funkce
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def download_datalisting_reports_3001(form_name: str, country: str | None = None):
|
||||
"""
|
||||
Stáhne Data Listing report pro studii UCO3001.
|
||||
|
||||
Args:
|
||||
form_name: Název formuláře, např. "Trial Disposition (Completion / Discontinuation)"
|
||||
country: Kód site group, např. "CZE". Pokud None, filtr země se nenastaví (všechny).
|
||||
"""
|
||||
if not PASSWORD:
|
||||
print("Chyba: nastav IMEDIDATA_PASSWORD v souboru .env")
|
||||
sys.exit(1)
|
||||
|
||||
DOWNLOAD_DIR.mkdir(exist_ok=True)
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False, slow_mo=200)
|
||||
ctx_kwargs = {"accept_downloads": True}
|
||||
|
||||
use_saved = auth_valid()
|
||||
if use_saved:
|
||||
print("Načítám uloženou session (auth.json)...")
|
||||
ctx_kwargs["storage_state"] = str(AUTH_FILE)
|
||||
|
||||
context = browser.new_context(**ctx_kwargs)
|
||||
page = context.new_page()
|
||||
|
||||
logged_in = go_to_select_role(page)
|
||||
|
||||
if not logged_in:
|
||||
if use_saved:
|
||||
print("Session expirovala, přihlašuji znovu...")
|
||||
AUTH_FILE.unlink(missing_ok=True)
|
||||
do_login(page, context)
|
||||
go_to_select_role(page)
|
||||
|
||||
select_role(page)
|
||||
navigate_to_reporter(page)
|
||||
open_report(page)
|
||||
|
||||
print("\nNastavuji parametry reportu...")
|
||||
set_study_param(page)
|
||||
|
||||
if country:
|
||||
set_site_group_param(page, country)
|
||||
else:
|
||||
print(" Parametr Site Group: přeskočen (všechny země)")
|
||||
|
||||
print(f"\n=== Stahuji formulář: {form_name} ===")
|
||||
set_form_param(page, form_name)
|
||||
output = submit_and_download(page, context, form_name, country)
|
||||
|
||||
browser.close()
|
||||
print("Prohlížeč zavřen.")
|
||||
return output
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Příklady spuštění:
|
||||
# python download_uco3001.py
|
||||
# python download_uco3001.py CZE
|
||||
|
||||
country_arg = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
download_datalisting_reports_3001(
|
||||
form_name="Trial Disposition (Completion / Discontinuation)",
|
||||
country=country_arg,
|
||||
)
|
||||
@@ -0,0 +1,451 @@
|
||||
"""
|
||||
Import EDC CSV reportů do MongoDB.
|
||||
|
||||
Použití:
|
||||
python edc_import.py report.csv
|
||||
python edc_import.py reports/*.csv
|
||||
python edc_import.py report.csv --host mongodb://192.168.1.100:27017 --db klinicka_studie
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from pymongo import MongoClient, ASCENDING
|
||||
from pymongo.errors import PyMongoError
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Logging
|
||||
# ---------------------------------------------------------------------------
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("edc_import.log", encoding="utf-8"),
|
||||
logging.StreamHandler(open(sys.stdout.fileno(), mode="w", encoding="utf-8", closefd=False)),
|
||||
],
|
||||
)
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mapování pevných CSV sloupců na MongoDB cesty
|
||||
# ---------------------------------------------------------------------------
|
||||
FIXED_FIELDS = {
|
||||
"StudyName": "study",
|
||||
"SiteGroupName": "site.group",
|
||||
"SiteID": "site.id",
|
||||
"SiteNumber": "site.number",
|
||||
"Site": "site.name",
|
||||
"SubjectID": "subject.id",
|
||||
"Subject": "subject.label",
|
||||
"CRFVersionID": "form.crfVersionId",
|
||||
"InstanceID": "form.instanceId",
|
||||
"InstanceName": "form.instanceName",
|
||||
"FolderSeq": "form.folderSeq",
|
||||
"Page": "form.page",
|
||||
"RecordID": "form.recordId",
|
||||
"RecordPosition": "form.recordPosition",
|
||||
"LastModifiedDate": "lastModified",
|
||||
"PrintDateTime": "importedAt",
|
||||
}
|
||||
|
||||
# Sloupce, které jdou do _meta (ostatní administrativní)
|
||||
META_FIELDS = {"RunUser", "VersionNumber", "FilterField"}
|
||||
|
||||
# Pole, která se převedou na int
|
||||
INT_FIELDS = {"Elapsed days"}
|
||||
|
||||
# Formáty datumů, které zkusíme parsovat
|
||||
DATE_FORMATS = [
|
||||
"%d %b %Y %H:%M:%S", # 20 MAY 2026 12:06:18
|
||||
"%d %b %Y %H:%M:%S:%f", # 10 Aug 2025 18:13:22:080 (EDC query dates)
|
||||
"%Y%m%d %H:%M:%S.%f", # 20250810 18:13:22.080 (sortable query dates)
|
||||
"%Y-%m-%d %H:%M:%S", # 2026-05-20 12:06:28
|
||||
"%Y-%m-%dT%H:%M:%S",
|
||||
"%Y-%m-%dT%H:%M:%S.%fZ",
|
||||
"%d/%m/%Y %H:%M:%S",
|
||||
"%m/%d/%Y %H:%M:%S",
|
||||
"%m/%d/%Y %I:%M:%S %p", # 5/20/2026 1:23:27 PM
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# QueryDetails — detekce a mapování
|
||||
# ---------------------------------------------------------------------------
|
||||
QUERY_DETAIL_MARKER = "QueryID(ReQry)"
|
||||
|
||||
QUERY_META_FIELDS = {
|
||||
"StudyParameter", "SiteGroupParameter", "SiteNumberParameter", "SiteParameter",
|
||||
"SubjectParameter", "SubjectStatusParameter", "FolderParameter", "FormParameter",
|
||||
"FieldParameter", "MarkingGroupParameter", "QueryStatusParameter",
|
||||
"IncludeInactivePagesParameter", "PageSDVParameter", "PageFrozenParameter",
|
||||
"PageLockedParameter", "StartDateParameter", "EndDateParameter",
|
||||
"MilestoneParameter", "ReportTypeParameter", "VersionNumber", "TimeZone",
|
||||
"RunUser", "ErrorString",
|
||||
# Sortable dates — redundantní, parsujeme z hlavních sloupců
|
||||
"OpenedDateSrtble", "AnsweredDateSrtble", "ClosedDateSrtble",
|
||||
# Agregátní počty — jdou do meta
|
||||
"VisitSiteLevel", "VisitCountryLevel", "VisitStudyLevel",
|
||||
"PageSubjectLevel", "PageSiteLevel", "PageCountryLevel", "PageStudyLevel",
|
||||
"Queries (Op/Ans/SDV)",
|
||||
}
|
||||
|
||||
|
||||
def is_query_details(fieldnames: list[str]) -> bool:
|
||||
return QUERY_DETAIL_MARKER in fieldnames
|
||||
|
||||
|
||||
def map_query_row(row: dict, source_file: str) -> dict:
|
||||
"""Přemapuje řádek QueryDetails reportu na MongoDB dokument."""
|
||||
|
||||
def val(col: str) -> str:
|
||||
return (row.get(col) or "").strip()
|
||||
|
||||
def int_or_none(col: str):
|
||||
v = val(col)
|
||||
if v == "":
|
||||
return None
|
||||
try:
|
||||
return int(v)
|
||||
except ValueError:
|
||||
return v
|
||||
|
||||
def date_or_str(col: str):
|
||||
v = val(col)
|
||||
if not v:
|
||||
return None
|
||||
parsed = parse_date(v)
|
||||
return parsed if parsed else v
|
||||
|
||||
meta = {k: row[k].strip() for k in QUERY_META_FIELDS if row.get(k, "").strip()}
|
||||
|
||||
doc = {
|
||||
"study": val("StudyParameter"),
|
||||
"site": {
|
||||
"group": val("Country/Region"),
|
||||
"number": val("Site Number"),
|
||||
"name": val("Sites"),
|
||||
},
|
||||
"subject": {
|
||||
"label": val("Subjects"),
|
||||
"status": val("Subject Status"),
|
||||
},
|
||||
"visit": val("Visits"),
|
||||
"page": val("Pages"),
|
||||
"recordPosition": int_or_none("RecordPosition"),
|
||||
"field": val("Field"),
|
||||
"queryGroup": val("Query Group"),
|
||||
"queryId": val(QUERY_DETAIL_MARKER),
|
||||
"queryStatus": val("QueryStatus"),
|
||||
"openedBy": val("Opened By"),
|
||||
"openedDate": date_or_str("Opened Date"),
|
||||
"answeredBy": val("Answered By") or None,
|
||||
"answeredDate": date_or_str("Answered Date"),
|
||||
"closedBy": val("Closed By") or None,
|
||||
"closedDate": date_or_str("Closed Date"),
|
||||
"daysNotYetClosed": int_or_none("DaysNotYetClosed"),
|
||||
"daysToAnswer": int_or_none("Days to Answer"),
|
||||
"daysToClose": int_or_none("Days to Close"),
|
||||
"queryText": val("QueryText"),
|
||||
"answerText": val("Answer Text (if any)") or None,
|
||||
"importedAt": date_or_str("PrintDateTime"),
|
||||
"sourceFile": source_file,
|
||||
"_meta": meta,
|
||||
}
|
||||
|
||||
# Odstraň None hodnoty z top-level (ne z nested)
|
||||
return {k: v for k, v in doc.items() if v is not None or k in ("queryId",)}
|
||||
|
||||
|
||||
def ensure_query_indexes(collection) -> None:
|
||||
collection.create_index([("queryId", ASCENDING)], unique=True, sparse=True)
|
||||
collection.create_index([("subject.label", ASCENDING)])
|
||||
collection.create_index([("site.number", ASCENDING)])
|
||||
collection.create_index([("queryStatus", ASCENDING)])
|
||||
collection.create_index([("openedDate", ASCENDING)])
|
||||
|
||||
|
||||
def ensure_snapshot_indexes(collection) -> None:
|
||||
"""Indexy pro queries_snapshots — unikátní kombinace queryId + snapshotDate."""
|
||||
collection.create_index(
|
||||
[("queryId", ASCENDING), ("snapshotDate", ASCENDING)],
|
||||
unique=True,
|
||||
)
|
||||
collection.create_index([("snapshotDate", ASCENDING)])
|
||||
collection.create_index([("queryStatus", ASCENDING)])
|
||||
collection.create_index([("site.number", ASCENDING)])
|
||||
collection.create_index([("subject.label", ASCENDING)])
|
||||
|
||||
|
||||
def extract_snapshot_date(filename: str) -> str:
|
||||
"""
|
||||
Vytáhne datum ze jména souboru.
|
||||
'2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv' → '2026-05-20'
|
||||
Fallback: dnešní datum.
|
||||
"""
|
||||
stem = Path(filename).name
|
||||
match = re.match(r"(\d{4}-\d{2}-\d{2})", stem)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def parse_date(value: str) -> str | None:
|
||||
"""Pokusí se převést string na ISO 8601; jinak vrátí None."""
|
||||
value = value.strip()
|
||||
for fmt in DATE_FORMATS:
|
||||
try:
|
||||
dt = datetime.strptime(value, fmt)
|
||||
return dt.replace(tzinfo=timezone.utc).isoformat()
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def set_nested(doc: dict, path: str, value: str) -> None:
|
||||
"""Nastaví hodnotu v nested dict podle tečkové cesty, např. 'site.id'."""
|
||||
parts = path.split(".")
|
||||
for part in parts[:-1]:
|
||||
doc = doc.setdefault(part, {})
|
||||
doc[parts[-1]] = value
|
||||
|
||||
|
||||
def collection_name_from_filename(filename: str) -> str:
|
||||
"""
|
||||
Odvodí název kolekce z názvu souboru.
|
||||
'2026-05-20_15-09_EDC_MDD3003_InterimInvestigatorSignature_DataListing.csv' → 'MDD3003_InterimInvestigatorSignature'
|
||||
'2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv' → 'MDD3003_QueryDetails'
|
||||
"""
|
||||
stem = Path(filename).stem
|
||||
# Se suffixem _DataListing
|
||||
match = re.search(r"EDC_(.+?)_DataListing", stem, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1)
|
||||
# Bez suffixu _DataListing (např. QueryDetails)
|
||||
match = re.search(r"EDC_(.+)$", stem, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return stem
|
||||
|
||||
|
||||
def map_row(row: dict, source_file: str) -> dict:
|
||||
"""Přemapuje jeden CSV řádek na MongoDB dokument."""
|
||||
doc: dict = {}
|
||||
meta: dict = {}
|
||||
fields: dict = {}
|
||||
|
||||
# Zjisti všechny klíče pro FieldNValue/FieldNLabel
|
||||
field_keys = set(row.keys())
|
||||
|
||||
for col, value in row.items():
|
||||
value = value.strip() if value else ""
|
||||
|
||||
# Pevná pole
|
||||
if col in FIXED_FIELDS:
|
||||
path = FIXED_FIELDS[col]
|
||||
|
||||
if path == "form.folderSeq":
|
||||
try:
|
||||
value = int(value)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
elif path == "form.recordPosition":
|
||||
try:
|
||||
value = int(value)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
elif path in ("lastModified", "importedAt"):
|
||||
parsed = parse_date(value)
|
||||
value = parsed if parsed else value
|
||||
|
||||
set_nested(doc, path, value)
|
||||
continue
|
||||
|
||||
# Meta pole
|
||||
if col in META_FIELDS:
|
||||
if value:
|
||||
meta[col] = value
|
||||
continue
|
||||
|
||||
# FieldNLabel / FieldNValue jsou zpracovány níže
|
||||
if re.match(r"^Field\d+(Value|Label)$", col):
|
||||
continue
|
||||
|
||||
# Zbývající neznámé pevné sloupce také do meta
|
||||
if not re.match(r"^Field\d+", col):
|
||||
if value:
|
||||
meta[col] = value
|
||||
|
||||
# Zpracuj páry Field1Value/Field1Label ... Field300Value/Field300Label
|
||||
n = 1
|
||||
while True:
|
||||
val_key = f"Field{n}Value"
|
||||
lbl_key = f"Field{n}Label"
|
||||
if val_key not in field_keys and lbl_key not in field_keys:
|
||||
break
|
||||
label = (row.get(lbl_key) or "").strip()
|
||||
value = (row.get(val_key) or "").strip()
|
||||
if label and value:
|
||||
# Pokus o převod čísel
|
||||
if label in INT_FIELDS:
|
||||
try:
|
||||
fields[label] = int(value)
|
||||
except ValueError:
|
||||
fields[label] = value
|
||||
else:
|
||||
# Pokus o datum
|
||||
parsed = parse_date(value)
|
||||
fields[label] = parsed if parsed else value
|
||||
n += 1
|
||||
|
||||
doc["fields"] = fields
|
||||
doc["sourceFile"] = source_file
|
||||
if meta:
|
||||
doc["_meta"] = meta
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
def ensure_indexes(collection) -> None:
|
||||
collection.create_index([("form.recordId", ASCENDING)], unique=True, sparse=True)
|
||||
collection.create_index([("subject.id", ASCENDING)])
|
||||
collection.create_index([("site.id", ASCENDING)])
|
||||
collection.create_index([("study", ASCENDING)])
|
||||
collection.create_index([("lastModified", ASCENDING)])
|
||||
|
||||
|
||||
def import_file(
|
||||
csv_path: str,
|
||||
collection,
|
||||
snapshot_col=None,
|
||||
snapshot_date: str | None = None,
|
||||
) -> tuple[int, int, int]:
|
||||
"""
|
||||
Importuje jeden CSV soubor. Vrátí (inserted, updated, errors).
|
||||
snapshot_col: pokud je zadán, pro QueryDetails se zapíše i daily snapshot.
|
||||
"""
|
||||
inserted = updated = errors = 0
|
||||
source_file = Path(csv_path).name
|
||||
|
||||
with open(csv_path, encoding="utf-8", newline="") as f:
|
||||
reader = csv.DictReader(f, delimiter=",", quotechar='"')
|
||||
query_mode = is_query_details(reader.fieldnames or [])
|
||||
|
||||
for line_no, row in enumerate(reader, start=2):
|
||||
try:
|
||||
if query_mode:
|
||||
doc = map_query_row(row, source_file)
|
||||
upsert_key = {"queryId": doc["queryId"]}
|
||||
|
||||
# Snapshot — upsert na (queryId, snapshotDate)
|
||||
if snapshot_col is not None and snapshot_date:
|
||||
snap_doc = {**doc, "snapshotDate": snapshot_date}
|
||||
snapshot_col.update_one(
|
||||
{"queryId": doc["queryId"], "snapshotDate": snapshot_date},
|
||||
{"$set": snap_doc},
|
||||
upsert=True,
|
||||
)
|
||||
else:
|
||||
doc = map_row(row, source_file)
|
||||
record_id = doc.get("form", {}).get("recordId")
|
||||
upsert_key = {"form.recordId": record_id} if record_id else None
|
||||
|
||||
if upsert_key:
|
||||
result = collection.update_one(
|
||||
upsert_key,
|
||||
{"$set": doc},
|
||||
upsert=True,
|
||||
)
|
||||
if result.upserted_id:
|
||||
inserted += 1
|
||||
else:
|
||||
updated += 1
|
||||
else:
|
||||
collection.insert_one(doc)
|
||||
inserted += 1
|
||||
|
||||
except PyMongoError as e:
|
||||
errors += 1
|
||||
log.error("Řádek %d v %s: MongoDB chyba: %s", line_no, csv_path, e)
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
log.error("Řádek %d v %s: %s", line_no, csv_path, e)
|
||||
|
||||
return inserted, updated, errors
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Import EDC CSV reportů do MongoDB")
|
||||
parser.add_argument("files", nargs="+", help="CSV soubory nebo glob vzor")
|
||||
parser.add_argument("--host", default="mongodb://192.168.1.76:27017", help="MongoDB URI")
|
||||
parser.add_argument("--db", default="edc", help="Název databáze")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Rozbal glob vzory (důležité na Windows kde shell sám neglobuje)
|
||||
paths: list[str] = []
|
||||
for pattern in args.files:
|
||||
expanded = glob.glob(pattern)
|
||||
paths.extend(expanded if expanded else [pattern])
|
||||
|
||||
if not paths:
|
||||
log.error("Žádné soubory nenalezeny.")
|
||||
sys.exit(1)
|
||||
|
||||
client = MongoClient(args.host, serverSelectionTimeoutMS=5000)
|
||||
try:
|
||||
client.admin.command("ping")
|
||||
except Exception as e:
|
||||
log.error("Nelze se připojit k MongoDB (%s): %s", args.host, e)
|
||||
sys.exit(1)
|
||||
|
||||
db = client[args.db]
|
||||
|
||||
total_inserted = total_updated = total_errors = 0
|
||||
|
||||
for csv_path in paths:
|
||||
if not os.path.isfile(csv_path):
|
||||
log.warning("Soubor neexistuje, přeskakuji: %s", csv_path)
|
||||
continue
|
||||
|
||||
# Detekuj typ souboru a vyber kolekci + indexy
|
||||
with open(csv_path, encoding="utf-8", newline="") as f:
|
||||
fieldnames = csv.DictReader(f).fieldnames or []
|
||||
if is_query_details(fieldnames):
|
||||
col_name = "queries"
|
||||
collection = db[col_name]
|
||||
ensure_query_indexes(collection)
|
||||
snapshot_col = db["queries_snapshots"]
|
||||
ensure_snapshot_indexes(snapshot_col)
|
||||
snapshot_date = extract_snapshot_date(csv_path)
|
||||
log.info("Importuji: %s → %s.%s + queries_snapshots [%s]",
|
||||
csv_path, args.db, col_name, snapshot_date)
|
||||
else:
|
||||
col_name = collection_name_from_filename(csv_path)
|
||||
collection = db[col_name]
|
||||
ensure_indexes(collection)
|
||||
snapshot_col = None
|
||||
snapshot_date = None
|
||||
log.info("Importuji: %s → %s.%s", csv_path, args.db, col_name)
|
||||
|
||||
inserted, updated, errors = import_file(
|
||||
csv_path, collection, snapshot_col, snapshot_date
|
||||
)
|
||||
total_inserted += inserted
|
||||
total_updated += updated
|
||||
total_errors += errors
|
||||
log.info(" nové: %d aktualizované: %d chyby: %d", inserted, updated, errors)
|
||||
|
||||
log.info("=" * 60)
|
||||
log.info("Celkem — nové: %d aktualizované: %d chyby: %d",
|
||||
total_inserted, total_updated, total_errors)
|
||||
|
||||
client.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,43 @@
|
||||
# create_report_v1.0.md
|
||||
|
||||
**Skript:** `create_report_v1.0.py`
|
||||
**Verze:** 1.0
|
||||
**Datum:** 2026-06-01
|
||||
|
||||
## Popis
|
||||
|
||||
Generuje Excel EDC DataListing report pro studii **77242113UCO3001** z MongoDB (db: `edc`).
|
||||
|
||||
## Výstup
|
||||
|
||||
`Medidata/reports/YYYY-MM-DD 77242113UCO3001 EDC DataListing v1.0.xlsx`
|
||||
|
||||
Stará verze se automaticky přesune do `reports/TRASH/`.
|
||||
|
||||
## Listy
|
||||
|
||||
| List | Kolekce MongoDB | Záznamy (CZE) |
|
||||
|------|----------------|---------------|
|
||||
| DateofVisit | UCO3001.DateofVisit | 55 |
|
||||
| ConcomitantTherapy | UCO3001.ConcomitantTherapy | 91 |
|
||||
| TrialDisposition | UCO3001.TrialDispositionCompletion-Discontinuation | 3 |
|
||||
|
||||
## Sloupce (každý list)
|
||||
|
||||
**Pevné:** SiteNumber · SiteName · Subject · Visit · FolderSeq · RecordPos · LastModified
|
||||
|
||||
**Dynamické:** všechny klíče z `fields{}` v pořadí výskytu v MongoDB
|
||||
|
||||
## Formátování
|
||||
|
||||
- Záhlaví: tmavomodrý fill, bílý tučný text, Calibri 10
|
||||
- Data: Calibri 10, tenké ohraničení
|
||||
- Zmrazení řádku 1, autofilter, šířky sloupců auto (max 55)
|
||||
- Datumy: DD-MMM-YYYY (čas jen pokud != 00:00)
|
||||
|
||||
## Spuštění
|
||||
|
||||
```
|
||||
cd Medidata
|
||||
python create_report_v1.0.py
|
||||
```
|
||||
@@ -0,0 +1,210 @@
|
||||
"""
|
||||
create_report_v1.0.py
|
||||
Verze: 1.0
|
||||
Datum: 2026-06-01
|
||||
Popis: Excel EDC DataListing report pro studii UCO3001 z MongoDB (db: edc).
|
||||
Jeden list per kolekce (DateofVisit / ConcomitantTherapy / TrialDisposition).
|
||||
Sloupce: SiteNumber, SiteName, Subject, Visit, FolderSeq, RecordPos,
|
||||
LastModified + dynamické fields.* z MongoDB.
|
||||
Výstup: reports/YYYY-MM-DD 77242113UCO3001 EDC DataListing v1.0.xlsx
|
||||
"""
|
||||
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
|
||||
from openpyxl.utils import get_column_letter
|
||||
from pymongo import ASCENDING, MongoClient
|
||||
|
||||
# ── Konfigurace ───────────────────────────────────────────────────────────────
|
||||
|
||||
MONGO_URI = "mongodb://192.168.1.76:27017"
|
||||
DB_NAME = "edc"
|
||||
STUDY_FULL = "77242113UCO3001"
|
||||
VERSION = "1.0"
|
||||
|
||||
OUTPUT_DIR = Path(__file__).parent / "reports"
|
||||
TRASH_DIR = OUTPUT_DIR / "TRASH"
|
||||
|
||||
COLLECTIONS = [
|
||||
"UCO3001.DateofVisit",
|
||||
"UCO3001.ConcomitantTherapy",
|
||||
"UCO3001.TrialDispositionCompletion-Discontinuation",
|
||||
]
|
||||
|
||||
# ── Formátování ───────────────────────────────────────────────────────────────
|
||||
|
||||
HEADER_FILL = PatternFill("solid", fgColor="1F4E79")
|
||||
HEADER_FONT = Font(bold=True, color="FFFFFF", name="Calibri", size=10)
|
||||
DATA_FONT = Font(name="Calibri", size=10)
|
||||
THIN = Side(style="thin", color="CCCCCC")
|
||||
BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
|
||||
|
||||
# ── Pevné sloupce ─────────────────────────────────────────────────────────────
|
||||
|
||||
FIXED_COLS = [
|
||||
("SiteNumber", lambda d: d.get("site", {}).get("number", "")),
|
||||
("SiteName", lambda d: d.get("site", {}).get("name", "")),
|
||||
("Subject", lambda d: d.get("subject", {}).get("label", "")),
|
||||
("Visit", lambda d: d.get("form", {}).get("instanceName", "")),
|
||||
("FolderSeq", lambda d: d.get("form", {}).get("folderSeq", "")),
|
||||
("RecordPos", lambda d: d.get("form", {}).get("recordPosition", "")),
|
||||
("LastModified", lambda d: _fmt(d.get("lastModified", ""))),
|
||||
]
|
||||
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def _fmt(value: str) -> str:
|
||||
"""ISO datetime string → 'DD-MMM-YYYY' nebo 'DD-MMM-YYYY HH:MM'."""
|
||||
if not value:
|
||||
return ""
|
||||
try:
|
||||
dt = datetime.fromisoformat(value)
|
||||
if dt.hour == 0 and dt.minute == 0 and dt.second == 0:
|
||||
return dt.strftime("%d-%b-%Y")
|
||||
return dt.strftime("%d-%b-%Y %H:%M")
|
||||
except Exception:
|
||||
return value
|
||||
|
||||
|
||||
def _fmt_field(value) -> str:
|
||||
"""Naformátuje hodnotu z fields{} — datum nebo string."""
|
||||
if isinstance(value, str) and "T" in value and value.endswith(("+00:00", "Z")):
|
||||
return _fmt(value)
|
||||
return value if value is not None else ""
|
||||
|
||||
|
||||
COLS_LAST_CT = [
|
||||
"CMTRT_ATC1", "CMTRT_ATC2", "CMTRT_ATC3", "CMTRT_ATC4",
|
||||
"CMTRT_RXPREF", "CMTRT_TRADE_NAME",
|
||||
"CMTRT_ATC1_CODE", "CMTRT_ATC2_CODE", "CMTRT_ATC3_CODE", "CMTRT_ATC4_CODE",
|
||||
"CMTRT_RXPREF_CODE", "CMTRT_TRADE_NAME_CODE",
|
||||
]
|
||||
|
||||
|
||||
def _field_keys(docs: list, last: list | None = None) -> list:
|
||||
"""Vrátí seznam unikátních klíčů z fields{} — klíče v `last` přesunuty na konec."""
|
||||
seen = set()
|
||||
keys = []
|
||||
for doc in docs:
|
||||
for k in doc.get("fields", {}).keys():
|
||||
if k not in seen:
|
||||
seen.add(k)
|
||||
keys.append(k)
|
||||
if last:
|
||||
tail = [k for k in last if k in seen]
|
||||
keys = [k for k in keys if k not in set(tail)] + tail
|
||||
return keys
|
||||
|
||||
|
||||
def _sheet_name(collection: str) -> str:
|
||||
"""UCO3001.SomeName → SomeName (max 31 znaků pro Excel)."""
|
||||
name = collection.split(".", 1)[-1]
|
||||
abbreviations = {
|
||||
"TrialDispositionCompletion-Discontinuation": "TrialDisposition",
|
||||
}
|
||||
return abbreviations.get(name, name)[:31]
|
||||
|
||||
|
||||
# ── Zápis listu ───────────────────────────────────────────────────────────────
|
||||
|
||||
def write_sheet(ws, docs: list, last_cols: list | None = None) -> None:
|
||||
fixed_names = [c[0] for c in FIXED_COLS]
|
||||
field_keys = _field_keys(docs, last=last_cols)
|
||||
all_headers = fixed_names + field_keys
|
||||
|
||||
# záhlaví
|
||||
for col_i, header in enumerate(all_headers, 1):
|
||||
cell = ws.cell(row=1, column=col_i, value=header)
|
||||
cell.font = HEADER_FONT
|
||||
cell.fill = HEADER_FILL
|
||||
cell.border = BORDER
|
||||
cell.alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.row_dimensions[1].height = 18
|
||||
ws.freeze_panes = "A2"
|
||||
|
||||
# data
|
||||
for row_i, doc in enumerate(docs, 2):
|
||||
fields = doc.get("fields", {})
|
||||
|
||||
for col_i, (_, getter) in enumerate(FIXED_COLS, 1):
|
||||
cell = ws.cell(row=row_i, column=col_i, value=getter(doc))
|
||||
cell.font = DATA_FONT
|
||||
cell.border = BORDER
|
||||
cell.alignment = Alignment(vertical="top")
|
||||
|
||||
for col_off, key in enumerate(field_keys):
|
||||
col_i = len(FIXED_COLS) + col_off + 1
|
||||
cell = ws.cell(row=row_i, column=col_i, value=_fmt_field(fields.get(key, "")))
|
||||
cell.font = DATA_FONT
|
||||
cell.border = BORDER
|
||||
cell.alignment = Alignment(vertical="top")
|
||||
|
||||
# autofilter
|
||||
if all_headers:
|
||||
ws.auto_filter.ref = f"A1:{get_column_letter(len(all_headers))}1"
|
||||
|
||||
# šířky sloupců
|
||||
widths = {i: len(h) for i, h in enumerate(all_headers, 1)}
|
||||
for doc in docs:
|
||||
fields = doc.get("fields", {})
|
||||
for col_i, (_, getter) in enumerate(FIXED_COLS, 1):
|
||||
widths[col_i] = max(widths[col_i], len(str(getter(doc))))
|
||||
for col_off, key in enumerate(field_keys):
|
||||
col_i = len(FIXED_COLS) + col_off + 1
|
||||
widths[col_i] = max(widths[col_i], len(str(fields.get(key, ""))))
|
||||
|
||||
for col_i, w in widths.items():
|
||||
ws.column_dimensions[get_column_letter(col_i)].width = min(w + 2, 55)
|
||||
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main() -> None:
|
||||
client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
|
||||
client.admin.command("ping")
|
||||
db = client[DB_NAME]
|
||||
|
||||
wb = Workbook()
|
||||
wb.remove(wb.active)
|
||||
|
||||
for coll_name in COLLECTIONS:
|
||||
docs = list(db[coll_name].find(
|
||||
{},
|
||||
{"_id": 0, "sourceFile": 0, "history": 0},
|
||||
sort=[
|
||||
("site.number", ASCENDING),
|
||||
("subject.label", ASCENDING),
|
||||
("form.folderSeq", ASCENDING),
|
||||
("form.recordPosition", ASCENDING),
|
||||
],
|
||||
))
|
||||
ws = wb.create_sheet(title=_sheet_name(coll_name))
|
||||
last = COLS_LAST_CT if "ConcomitantTherapy" in coll_name else None
|
||||
write_sheet(ws, docs, last_cols=last)
|
||||
print(f" {coll_name}: {len(docs)} zaznamu -> list '{ws.title}'")
|
||||
|
||||
client.close()
|
||||
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
TRASH_DIR.mkdir(exist_ok=True)
|
||||
|
||||
# přesun starých verzí do TRASH
|
||||
pattern = f"* {STUDY_FULL} EDC DataListing *.xlsx"
|
||||
for old in OUTPUT_DIR.glob(pattern):
|
||||
dest = TRASH_DIR / old.name
|
||||
shutil.move(str(old), str(dest))
|
||||
print(f" Přesunuto do TRASH: {old.name}")
|
||||
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
filename = f"{today} {STUDY_FULL} EDC DataListing v{VERSION}.xlsx"
|
||||
out_path = OUTPUT_DIR / filename
|
||||
wb.save(str(out_path))
|
||||
print(f"\nUloženo: {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
After Width: | Height: | Size: 28 KiB |
|
After Width: | Height: | Size: 30 KiB |
|
After Width: | Height: | Size: 30 KiB |
|
After Width: | Height: | Size: 9.6 KiB |
|
After Width: | Height: | Size: 28 KiB |
|
After Width: | Height: | Size: 249 KiB |
|
After Width: | Height: | Size: 16 KiB |
@@ -0,0 +1,501 @@
|
||||
"""
|
||||
download_edc_datalistings.py
|
||||
Verze: 2.0
|
||||
Datum: 2026-05-27
|
||||
|
||||
Univerzální stahování EDC Data Listing reportů (ReportID=92) z Medidata Rave.
|
||||
|
||||
Parametry:
|
||||
study – vyhledávací řetězec studie (např. "77242113UCO3001")
|
||||
forms – seznam názvů formulářů ke stažení
|
||||
country – kód země / site group (např. "CZE"), None = všechny
|
||||
|
||||
Prohlížeč se otevře jednou, přihlásí se, a stáhne všechny formuláře v jedné session.
|
||||
|
||||
Použití:
|
||||
from download_edc import download_datalisting
|
||||
|
||||
download_datalisting(
|
||||
study="77242113UCO3001",
|
||||
forms=["Date of Visit", "Concomitant Therapy"],
|
||||
country="CZE",
|
||||
)
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||||
import tkinter as tk
|
||||
from tkinter import simpledialog
|
||||
|
||||
load_dotenv(Path(__file__).parent / ".env")
|
||||
|
||||
USERNAME = os.getenv("IMEDIDATA_USERNAME", "vladimir.buzalka")
|
||||
PASSWORD = os.getenv("IMEDIDATA_PASSWORD", "")
|
||||
DOWNLOAD_DIR = Path(__file__).parent / "downloads"
|
||||
AUTH_FILE = Path(__file__).parent / "auth.json"
|
||||
AUTH_MAX_AGE_DAYS = 7
|
||||
|
||||
LOGIN_URL = "https://login.imedidata.com/login"
|
||||
SELECT_ROLE_URL = (
|
||||
"https://jnjja.mdsol.com/MedidataRave/SelectRole.aspx"
|
||||
"?client_division_uuid=e5de55d5-a414-4bd1-9abe-18e96fd5475d"
|
||||
"&study_group_uuid=b0793ca6-33ec-44e8-883b-6fc1a4b671c4"
|
||||
"&studygroup_id=107981"
|
||||
)
|
||||
|
||||
REPORT_ID = 92
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def auth_valid():
|
||||
if not AUTH_FILE.exists():
|
||||
return False
|
||||
age = datetime.now() - datetime.fromtimestamp(AUTH_FILE.stat().st_mtime)
|
||||
return age < timedelta(days=AUTH_MAX_AGE_DAYS)
|
||||
|
||||
|
||||
def wait_load(page, extra_ms=1000):
|
||||
try:
|
||||
page.wait_for_load_state("load", timeout=20_000)
|
||||
except PWTimeout:
|
||||
pass
|
||||
page.wait_for_timeout(extra_ms)
|
||||
|
||||
|
||||
def dbg(page, label):
|
||||
print(f"[{label}] URL: {page.url}")
|
||||
try:
|
||||
from pathlib import Path
|
||||
shots = Path(__file__).parent / "debug_shots"
|
||||
shots.mkdir(exist_ok=True)
|
||||
path = shots / f"{label}.png"
|
||||
page.screenshot(path=str(path), full_page=True)
|
||||
print(f"[{label}] screenshot: {path}")
|
||||
except Exception as e:
|
||||
print(f"[{label}] screenshot failed: {e}")
|
||||
|
||||
|
||||
def extract_study_label(study_search: str) -> str:
|
||||
match = re.search(r'[A-Z]+\d+$', study_search)
|
||||
return match.group(0) if match else study_search
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Login
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _ask_otp_popup():
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
root.lift()
|
||||
root.attributes("-topmost", True)
|
||||
otp = simpledialog.askstring("OKTA MFA", "Zadej OTP kód z OKTA (6 číslic):", parent=root)
|
||||
root.destroy()
|
||||
return (otp or "").strip()
|
||||
|
||||
|
||||
def do_login(page, context):
|
||||
print("Přihlašuji se do iMedidata...")
|
||||
page.goto(LOGIN_URL)
|
||||
wait_load(page, 500)
|
||||
page.wait_for_selector('input[name="session[username]"]', timeout=10_000)
|
||||
page.fill('input[name="session[username]"]', USERNAME)
|
||||
page.fill('input[name="session[password]"]', PASSWORD)
|
||||
page.click('button[type="submit"]')
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "after-signin")
|
||||
|
||||
if _okta_mfa_present(page):
|
||||
print("\n*** OKTA MFA vyžadována! ***")
|
||||
otp = _ask_otp_popup()
|
||||
if not otp:
|
||||
print("CHYBA: OTP nebylo zadáno.")
|
||||
sys.exit(1)
|
||||
_fill_otp(page, otp)
|
||||
wait_load(page, 3000)
|
||||
|
||||
try:
|
||||
page.wait_for_url("**/home.imedidata.com**", timeout=30_000)
|
||||
except PWTimeout:
|
||||
dbg(page, "wait-home-timeout")
|
||||
|
||||
if "home.imedidata.com" not in page.url:
|
||||
print("CHYBA: Přihlášení se nezdařilo!")
|
||||
sys.exit(1)
|
||||
|
||||
context.storage_state(path=str(AUTH_FILE))
|
||||
print("Session uložena do auth.json")
|
||||
|
||||
|
||||
def _okta_mfa_present(page):
|
||||
if "okta" in page.url.lower():
|
||||
return True
|
||||
for sel in ['input[name="answer"]', 'input[name*="otp"]',
|
||||
'input[name*="code"]', 'input[placeholder*="code" i]']:
|
||||
if page.query_selector(sel):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _fill_otp(page, otp):
|
||||
for sel in ['input[name="answer"]', 'input[name*="otp"]',
|
||||
'input[name*="code"]', 'input[type="tel"]', 'input[placeholder*="code" i]']:
|
||||
el = page.query_selector(sel)
|
||||
if el:
|
||||
el.fill(otp)
|
||||
page.keyboard.press("Enter")
|
||||
return
|
||||
page.keyboard.type(otp)
|
||||
page.keyboard.press("Enter")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Navigace
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def go_to_select_role(page):
|
||||
print("Navigace na SelectRole...")
|
||||
try:
|
||||
page.goto(SELECT_ROLE_URL)
|
||||
except Exception:
|
||||
pass
|
||||
wait_load(page, 1500)
|
||||
dbg(page, "select-role")
|
||||
return "login" not in page.url.lower() and "okta" not in page.url.lower()
|
||||
|
||||
|
||||
def select_role(page):
|
||||
print("Vybírám roli Site Manager...")
|
||||
try:
|
||||
page.wait_for_selector("select", timeout=10_000)
|
||||
except PWTimeout:
|
||||
return
|
||||
|
||||
for sel_el in page.query_selector_all("select"):
|
||||
for opt in sel_el.query_selector_all("option"):
|
||||
txt = (opt.inner_text() or "").strip()
|
||||
if "site manager" in txt.lower():
|
||||
sel_el.select_option(label=txt)
|
||||
print(f" Vybráno: '{txt}'")
|
||||
break
|
||||
|
||||
clicked = False
|
||||
for btn_sel in ['input[value="Continue"]', 'input[type="submit"]',
|
||||
'button:has-text("Continue")', 'button[type="submit"]']:
|
||||
try:
|
||||
btn = page.query_selector(btn_sel)
|
||||
except Exception:
|
||||
continue
|
||||
if btn:
|
||||
try:
|
||||
with page.expect_navigation(timeout=15_000):
|
||||
btn.click()
|
||||
clicked = True
|
||||
break
|
||||
except PWTimeout:
|
||||
print(f" Click on {btn_sel} nezpůsobil navigaci, zkouším další...")
|
||||
continue
|
||||
|
||||
if not clicked:
|
||||
print(" Fallback: submituji formulář přes JS...")
|
||||
try:
|
||||
with page.expect_navigation(timeout=15_000):
|
||||
page.evaluate("document.forms[0] && document.forms[0].submit()")
|
||||
except PWTimeout:
|
||||
print(" JS submit fallback také neprošel.")
|
||||
|
||||
wait_load(page, 1500)
|
||||
dbg(page, "after-role")
|
||||
|
||||
|
||||
def navigate_to_reporter(page):
|
||||
print("Klikám na Reporter...")
|
||||
page.wait_for_selector('a:has-text("Reporter")', timeout=15_000)
|
||||
page.click('a:has-text("Reporter")')
|
||||
wait_load(page, 1500)
|
||||
dbg(page, "reporter")
|
||||
|
||||
|
||||
def open_report(page):
|
||||
print(f"Otevírám report ID={REPORT_ID} (Data Listing - Data Stream)...")
|
||||
selector = f'a[href="PromptsPage.aspx?ReportID={REPORT_ID}"]'
|
||||
page.wait_for_selector(selector, timeout=15_000)
|
||||
page.click(selector)
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "report-opened")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parametry reportu
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def set_study_param(page, study_search: str):
|
||||
print(f" Parametr Study: hledám '{study_search}'...")
|
||||
|
||||
page.click('#PromptsBox_st_ShowHideBtn')
|
||||
page.wait_for_timeout(1500)
|
||||
|
||||
page.wait_for_selector('input[id^="PromptsBox_st_FrontEndCBList_"]', timeout=10_000)
|
||||
checkboxes = page.query_selector_all('input[id^="PromptsBox_st_FrontEndCBList_"]')
|
||||
|
||||
found = False
|
||||
for cb in checkboxes:
|
||||
cb_id = cb.get_attribute("id")
|
||||
label_text = page.evaluate(
|
||||
"""id => {
|
||||
const el = document.getElementById(id);
|
||||
if (!el) return '';
|
||||
const row = el.closest('tr') || el.closest('td') || el.parentElement;
|
||||
return row ? row.innerText : '';
|
||||
}""",
|
||||
cb_id
|
||||
)
|
||||
print(f" [{cb_id}] label: {label_text.strip()[:80]}")
|
||||
if study_search.upper() in label_text.upper():
|
||||
if not page.locator(f"#{cb_id}").is_checked():
|
||||
page.locator(f"#{cb_id}").check()
|
||||
print(f" Nalezeno a zaškrtnuto: '{label_text.strip()}'")
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
print(f" VAROVÁNÍ: Studie '{study_search}' nenalezena! Zkouším index 0...")
|
||||
cb0 = page.locator('#PromptsBox_st_FrontEndCBList_0')
|
||||
if not cb0.is_checked():
|
||||
cb0.check()
|
||||
|
||||
wait_load(page, 3000)
|
||||
dbg(page, "after-study")
|
||||
|
||||
|
||||
def set_site_group_param(page, country: str):
|
||||
print(f" Parametr Site Group: {country}")
|
||||
|
||||
page.click('#PromptsBox_sg_ShowHideBtn')
|
||||
page.wait_for_timeout(1500)
|
||||
|
||||
page.wait_for_selector('#PromptsBox_sg_List', timeout=10_000)
|
||||
page.select_option('#PromptsBox_sg_List', label=country)
|
||||
page.evaluate(
|
||||
"document.querySelector('#PromptsBox_sg_List').dispatchEvent(new Event('change', {bubbles:true}))"
|
||||
)
|
||||
wait_load(page, 2000)
|
||||
|
||||
cb = page.locator('#PromptsBox_sg_CheckBox')
|
||||
if not cb.is_checked():
|
||||
cb.check()
|
||||
page.evaluate(
|
||||
"document.querySelector('#PromptsBox_sg_CheckBox').dispatchEvent(new Event('change', {bubbles:true}))"
|
||||
)
|
||||
wait_load(page, 2000)
|
||||
|
||||
page.click('#PromptsBox_sg_ShowHideBtn')
|
||||
wait_load(page, 3000)
|
||||
dbg(page, "after-site-group")
|
||||
|
||||
|
||||
def set_form_param(page, form_name: str):
|
||||
print(f" Parametr Form: {form_name}")
|
||||
|
||||
is_closed = page.locator('#PromptsBox_fm2_div').evaluate('el => el.style.display') == 'none'
|
||||
if is_closed:
|
||||
page.click('#PromptsBox_fm2_ShowHideBtn')
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
if page.locator('#PromptsBox_fm2_PageModeBtn').is_visible():
|
||||
page.click('#PromptsBox_fm2_PageModeBtn')
|
||||
page.wait_for_timeout(1000)
|
||||
page.click('#PromptsBox_fm2_PageModeBtn')
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
search = page.locator('#PromptsBox_fm2_SearchTxt')
|
||||
search.wait_for(state='visible', timeout=10_000)
|
||||
search.click()
|
||||
search.fill(form_name)
|
||||
page.wait_for_timeout(2000)
|
||||
search.press('Enter')
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
cb_locator = page.locator('input[id^="PromptsBox_fm2_FrontEndCBList_"]').first
|
||||
try:
|
||||
cb_locator.wait_for(state='visible', timeout=8_000)
|
||||
except PWTimeout:
|
||||
print(f" VAROVÁNÍ: '{form_name}' nenalezen!")
|
||||
return
|
||||
|
||||
if not cb_locator.is_checked():
|
||||
cb_locator.click()
|
||||
print(f" '{form_name}' zaškrtnuto")
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Submit a download
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def submit_and_download(page, context, form_name: str, country: str | None, study_label: str):
|
||||
print("Odesílám report...")
|
||||
|
||||
with context.expect_page() as new_page_info:
|
||||
page.locator('input[value="Submit Report"], button:has-text("Submit Report")').first.click()
|
||||
|
||||
new_page = new_page_info.value
|
||||
new_page.wait_for_url(lambda url: url != 'about:blank', timeout=30_000)
|
||||
|
||||
print(" Čekám na vygenerování reportu (max 5 min)...")
|
||||
new_page.wait_for_selector(
|
||||
'input[value="Download File"], button:has-text("Download File")',
|
||||
timeout=300_000
|
||||
)
|
||||
new_page.wait_for_timeout(500)
|
||||
dbg(new_page, "download-window")
|
||||
|
||||
target_frame = new_page.main_frame
|
||||
for frame in new_page.frames:
|
||||
if frame.query_selector('select') or frame.query_selector('input[value="Download File"]'):
|
||||
target_frame = frame
|
||||
break
|
||||
|
||||
for sel in target_frame.query_selector_all('select'):
|
||||
for opt in sel.query_selector_all('option'):
|
||||
val = opt.get_attribute('value') or ''
|
||||
if 'vnd.ms-excel' in val:
|
||||
sel.select_option(value=val)
|
||||
print(" File type: .csv (application/vnd.ms-excel)")
|
||||
break
|
||||
|
||||
for sel in target_frame.query_selector_all('select'):
|
||||
for opt in sel.query_selector_all('option'):
|
||||
if 'attachment' in (opt.get_attribute('value') or '').lower():
|
||||
sel.select_option(value='attachment')
|
||||
break
|
||||
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
|
||||
country_slug = country if country else "ALL"
|
||||
form_slug = form_name.replace(" ", "").replace("/", "-").replace("(", "").replace(")", "")
|
||||
filename = f"{timestamp}_EDC_{study_label}_{country_slug}_{form_slug}_DataListing.csv"
|
||||
output_path = DOWNLOAD_DIR / filename
|
||||
|
||||
print("Stahuji CSV...")
|
||||
with new_page.expect_download(timeout=60_000) as dl_info:
|
||||
btn = target_frame.query_selector('input[value="Download File"], button:has-text("Download File")')
|
||||
if btn:
|
||||
btn.click()
|
||||
else:
|
||||
new_page.locator('input[value="Download File"], button:has-text("Download File")').first.click()
|
||||
|
||||
dl_info.value.save_as(str(output_path))
|
||||
print(f" Uloženo: {output_path}")
|
||||
|
||||
try:
|
||||
new_page.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hlavní funkce
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def download_datalisting(study: str, forms: list[str], country: str | None = None):
|
||||
"""
|
||||
Stáhne EDC Data Listing reporty pro zadanou studii.
|
||||
|
||||
Args:
|
||||
study: Vyhledávací řetězec studie, např. "77242113UCO3001"
|
||||
forms: Seznam názvů formulářů ke stažení
|
||||
country: Kód site group, např. "CZE". None = všechny země.
|
||||
"""
|
||||
if not PASSWORD:
|
||||
print("Chyba: nastav IMEDIDATA_PASSWORD v souboru .env")
|
||||
sys.exit(1)
|
||||
|
||||
if not forms:
|
||||
print("Žádné formuláře ke stažení.")
|
||||
return []
|
||||
|
||||
DOWNLOAD_DIR.mkdir(exist_ok=True)
|
||||
study_label = extract_study_label(study)
|
||||
results = []
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(
|
||||
headless=False,
|
||||
slow_mo=200,
|
||||
args=["--start-maximized"],
|
||||
)
|
||||
ctx_kwargs = {"accept_downloads": True, "no_viewport": True}
|
||||
|
||||
use_saved = auth_valid()
|
||||
if use_saved:
|
||||
print("Načítám uloženou session (auth.json)...")
|
||||
ctx_kwargs["storage_state"] = str(AUTH_FILE)
|
||||
|
||||
context = browser.new_context(**ctx_kwargs)
|
||||
page = context.new_page()
|
||||
|
||||
logged_in = go_to_select_role(page)
|
||||
|
||||
if not logged_in:
|
||||
if use_saved:
|
||||
print("Session expirovala, přihlašuji znovu...")
|
||||
AUTH_FILE.unlink(missing_ok=True)
|
||||
do_login(page, context)
|
||||
go_to_select_role(page)
|
||||
|
||||
select_role(page)
|
||||
navigate_to_reporter(page)
|
||||
open_report(page)
|
||||
|
||||
prompts_url = page.url
|
||||
|
||||
print("\nNastavuji parametry reportu...")
|
||||
set_study_param(page, study)
|
||||
|
||||
if country:
|
||||
set_site_group_param(page, country)
|
||||
else:
|
||||
print(" Parametr Site Group: přeskočen (všechny země)")
|
||||
|
||||
for i, form_name in enumerate(forms):
|
||||
print(f"\n=== [{i+1}/{len(forms)}] Stahuji formulář: {form_name} ===")
|
||||
|
||||
if i > 0:
|
||||
print("Navigace zpět na report...")
|
||||
page.goto(prompts_url)
|
||||
wait_load(page, 2000)
|
||||
set_study_param(page, study)
|
||||
if country:
|
||||
set_site_group_param(page, country)
|
||||
|
||||
set_form_param(page, form_name)
|
||||
output = submit_and_download(page, context, form_name, country, study_label)
|
||||
results.append(output)
|
||||
|
||||
browser.close()
|
||||
print(f"\nHotovo! Staženo {len(results)} formulářů. Prohlížeč zavřen.")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
country_arg = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
download_datalisting(
|
||||
study="77242113UCO3001",
|
||||
forms=["Trial Disposition (Completion / Discontinuation)"],
|
||||
country=country_arg,
|
||||
)
|
||||
@@ -0,0 +1,483 @@
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
|
||||
import tkinter as tk
|
||||
from tkinter import simpledialog
|
||||
|
||||
load_dotenv(Path(__file__).parent / ".env")
|
||||
|
||||
USERNAME = os.getenv("IMEDIDATA_USERNAME", "vladimir.buzalka")
|
||||
PASSWORD = os.getenv("IMEDIDATA_PASSWORD", "")
|
||||
DOWNLOAD_DIR = Path(__file__).parent / "downloads"
|
||||
AUTH_FILE = Path(__file__).parent / "auth.json"
|
||||
AUTH_MAX_AGE_DAYS = 7
|
||||
|
||||
LOGIN_URL = "https://login.imedidata.com/login"
|
||||
SELECT_ROLE_URL = (
|
||||
"https://jnjja.mdsol.com/MedidataRave/SelectRole.aspx"
|
||||
"?client_division_uuid=e5de55d5-a414-4bd1-9abe-18e96fd5475d"
|
||||
"&study_group_uuid=b0793ca6-33ec-44e8-883b-6fc1a4b671c4"
|
||||
"&studygroup_id=107981"
|
||||
)
|
||||
|
||||
STUDY_NAME = "42847922MDD3003"
|
||||
SITE_GROUP = "CZE"
|
||||
REPORT_ID = 164 # _EDC Std Rpt - Query Details (Data Stream)
|
||||
|
||||
# Query Status: libovolná kombinace z ["Open", "Answered", "Closed", "Canceled"]
|
||||
QUERY_STATUSES = [] # prázdné = Default: All (nefiltrovat)
|
||||
|
||||
# Milestone: vždy dostupný "Final", ostatní závisí na studii
|
||||
MILESTONES = ["Final"]
|
||||
|
||||
# Datum ve formátu DD-Mon-YYYY (např. "01-Jan-2024"), prázdný řetězec = bez filtru
|
||||
START_DATE = ""
|
||||
END_DATE = ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def auth_valid():
|
||||
if not AUTH_FILE.exists():
|
||||
return False
|
||||
age = datetime.now() - datetime.fromtimestamp(AUTH_FILE.stat().st_mtime)
|
||||
return age < timedelta(days=AUTH_MAX_AGE_DAYS)
|
||||
|
||||
|
||||
def wait_load(page, extra_ms=1000):
|
||||
try:
|
||||
page.wait_for_load_state("load", timeout=20_000)
|
||||
except PWTimeout:
|
||||
pass
|
||||
page.wait_for_timeout(extra_ms)
|
||||
|
||||
|
||||
def dbg(page, label):
|
||||
print(f"[{label}] URL: {page.url}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Login
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _ask_otp_popup():
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
root.lift()
|
||||
root.attributes("-topmost", True)
|
||||
otp = simpledialog.askstring(
|
||||
"OKTA MFA",
|
||||
"Zadej OTP kód z OKTA (6 číslic):",
|
||||
parent=root,
|
||||
)
|
||||
root.destroy()
|
||||
return (otp or "").strip()
|
||||
|
||||
|
||||
def do_login(page, context):
|
||||
print("Přihlašuji se do iMedidata...")
|
||||
page.goto(LOGIN_URL)
|
||||
wait_load(page, 500)
|
||||
dbg(page, "login-page")
|
||||
|
||||
page.wait_for_selector('input[name="session[username]"]', timeout=10_000)
|
||||
page.fill('input[name="session[username]"]', USERNAME)
|
||||
page.fill('input[name="session[password]"]', PASSWORD)
|
||||
page.click('button[type="submit"]')
|
||||
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "after-signin")
|
||||
|
||||
if _okta_mfa_present(page):
|
||||
print("\n*** OKTA MFA vyžadována! ***")
|
||||
otp = _ask_otp_popup()
|
||||
if not otp:
|
||||
print("CHYBA: OTP nebylo zadáno.")
|
||||
sys.exit(1)
|
||||
_fill_otp(page, otp)
|
||||
wait_load(page, 3000)
|
||||
dbg(page, "after-otp")
|
||||
|
||||
try:
|
||||
page.wait_for_url("**/home.imedidata.com**", timeout=30_000)
|
||||
except PWTimeout:
|
||||
dbg(page, "wait-home-timeout")
|
||||
|
||||
dbg(page, "final-login")
|
||||
|
||||
if "home.imedidata.com" not in page.url:
|
||||
print("CHYBA: Přihlášení se nezdařilo!")
|
||||
input("Zmáčkni Enter pro ukončení...")
|
||||
sys.exit(1)
|
||||
|
||||
context.storage_state(path=str(AUTH_FILE))
|
||||
print("Session uložena do auth.json")
|
||||
|
||||
|
||||
def _okta_mfa_present(page):
|
||||
if "okta" in page.url.lower():
|
||||
return True
|
||||
for sel in [
|
||||
'input[name="answer"]',
|
||||
'input[name*="otp"]',
|
||||
'input[name*="code"]',
|
||||
'input[placeholder*="code" i]',
|
||||
]:
|
||||
try:
|
||||
if page.query_selector(sel):
|
||||
return True
|
||||
except Exception:
|
||||
# Page navigated during selector, skip
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def _fill_otp(page, otp):
|
||||
for sel in [
|
||||
'input[name="answer"]',
|
||||
'input[name*="otp"]',
|
||||
'input[name*="code"]',
|
||||
'input[type="tel"]',
|
||||
'input[placeholder*="code" i]',
|
||||
]:
|
||||
try:
|
||||
el = page.query_selector(sel)
|
||||
if el:
|
||||
el.fill(otp)
|
||||
page.keyboard.press("Enter")
|
||||
return
|
||||
except Exception:
|
||||
# Page navigated, continue to next selector
|
||||
pass
|
||||
try:
|
||||
page.keyboard.type(otp)
|
||||
page.keyboard.press("Enter")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Navigace
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def go_to_select_role(page):
|
||||
print("Navigace na SelectRole...")
|
||||
try:
|
||||
page.goto(SELECT_ROLE_URL)
|
||||
except Exception:
|
||||
pass
|
||||
wait_load(page, 1500)
|
||||
dbg(page, "select-role")
|
||||
return "login" not in page.url.lower() and "okta" not in page.url.lower()
|
||||
|
||||
|
||||
def select_role(page):
|
||||
print("Vybírám roli Site Manager...")
|
||||
try:
|
||||
page.wait_for_selector("select", timeout=10_000)
|
||||
except PWTimeout:
|
||||
return
|
||||
|
||||
selects = page.query_selector_all("select")
|
||||
found = False
|
||||
for sel_el in selects:
|
||||
opts = sel_el.query_selector_all("option")
|
||||
for opt in opts:
|
||||
txt = (opt.inner_text() or "").strip()
|
||||
if "site manager" in txt.lower():
|
||||
sel_el.select_option(label=txt)
|
||||
found = True
|
||||
print(f" Vybráno: '{txt}'")
|
||||
break
|
||||
if found:
|
||||
break
|
||||
|
||||
if not found:
|
||||
try:
|
||||
page.get_by_text("Site Manager", exact=False).first.click()
|
||||
except Exception as e:
|
||||
print(f" {e}")
|
||||
|
||||
for btn_sel in [
|
||||
'input[value="Continue"]',
|
||||
'input[type="submit"]',
|
||||
'button:has-text("Continue")',
|
||||
'button[type="submit"]',
|
||||
]:
|
||||
try:
|
||||
btn = page.query_selector(btn_sel)
|
||||
if btn:
|
||||
btn.click()
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "after-role")
|
||||
|
||||
|
||||
def navigate_to_reporter(page):
|
||||
print("Klikám na Reporter...")
|
||||
try:
|
||||
page.wait_for_selector('a:has-text("Reporter")', timeout=15_000)
|
||||
page.click('a:has-text("Reporter")')
|
||||
wait_load(page, 1500)
|
||||
dbg(page, "reporter")
|
||||
except PWTimeout:
|
||||
dbg(page, "reporter-not-found")
|
||||
raise
|
||||
|
||||
|
||||
def open_report(page):
|
||||
print(f"Klikám na report ID={REPORT_ID} (Query Details)...")
|
||||
selector = f'a[href="PromptsPage.aspx?ReportID={REPORT_ID}"]'
|
||||
try:
|
||||
page.wait_for_selector(selector, timeout=15_000)
|
||||
page.click(selector)
|
||||
wait_load(page, 2000)
|
||||
dbg(page, "report-opened")
|
||||
except PWTimeout:
|
||||
dbg(page, "report-not-found")
|
||||
raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parametry reportu
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def set_study_param(page):
|
||||
print(f" Parametr Study: {STUDY_NAME}")
|
||||
page.click('#PromptsBox_st_ShowHideBtn')
|
||||
page.wait_for_timeout(1500)
|
||||
page.wait_for_selector('#PromptsBox_st_FrontEndCBList_0', timeout=10_000)
|
||||
cb = page.locator('#PromptsBox_st_FrontEndCBList_0')
|
||||
if not cb.is_checked():
|
||||
cb.check()
|
||||
wait_load(page, 3000)
|
||||
dbg(page, "after-study")
|
||||
|
||||
|
||||
def set_site_group_param(page):
|
||||
print(f" Parametr Site Group: {SITE_GROUP}")
|
||||
page.click('#PromptsBox_sg_ShowHideBtn')
|
||||
page.wait_for_timeout(1500)
|
||||
page.wait_for_selector('#PromptsBox_sg_List', timeout=10_000)
|
||||
page.select_option('#PromptsBox_sg_List', label=SITE_GROUP)
|
||||
page.evaluate("document.querySelector('#PromptsBox_sg_List').dispatchEvent(new Event('change', {bubbles:true}))")
|
||||
wait_load(page, 2000)
|
||||
|
||||
print(" Include Sub Site Groups: zapnuto")
|
||||
cb = page.locator('#PromptsBox_sg_CheckBox')
|
||||
if not cb.is_checked():
|
||||
cb.check()
|
||||
page.evaluate("document.querySelector('#PromptsBox_sg_CheckBox').dispatchEvent(new Event('change', {bubbles:true}))")
|
||||
wait_load(page, 2000)
|
||||
|
||||
page.click('#PromptsBox_sg_ShowHideBtn')
|
||||
wait_load(page, 3000)
|
||||
dbg(page, "after-site-group")
|
||||
|
||||
|
||||
def set_query_status_param(page):
|
||||
if not QUERY_STATUSES:
|
||||
print(" Parametr Query Status: All (přeskočeno)")
|
||||
return
|
||||
|
||||
print(f" Parametr Query Status: {', '.join(QUERY_STATUSES)}")
|
||||
page.click('#PromptsBox_qu_ShowHideBtn')
|
||||
page.wait_for_timeout(1500)
|
||||
|
||||
# Počkáme na načtení checkboxů
|
||||
page.wait_for_selector('input[id^="PromptsBox_qu_FrontEndCBList_"]', timeout=10_000)
|
||||
|
||||
# Zaškrtneme požadované statusy podle labelu
|
||||
label_map = {"Open": 0, "Answered": 1, "Closed": 2, "Canceled": 3}
|
||||
for status in QUERY_STATUSES:
|
||||
idx = label_map.get(status)
|
||||
if idx is None:
|
||||
print(f" VAROVÁNÍ: neznámý status '{status}'")
|
||||
continue
|
||||
cb = page.locator(f'#PromptsBox_qu_FrontEndCBList_{idx}')
|
||||
if not cb.is_checked():
|
||||
cb.check()
|
||||
print(f" '{status}' zaškrtnuto")
|
||||
|
||||
wait_load(page, 1000)
|
||||
|
||||
|
||||
def set_milestone_param(page):
|
||||
print(f" Parametr Milestone: {', '.join(MILESTONES)}")
|
||||
|
||||
# Otevřít panel pokud je zavřený
|
||||
is_closed = page.locator('#PromptsBox_ms_div').evaluate('el => el.style.display') == 'none'
|
||||
if is_closed:
|
||||
page.click('#PromptsBox_ms_ShowHideBtn')
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
# Po předchozím výběru: tužka → oko → načtení seznamu
|
||||
if page.locator('#PromptsBox_ms_PageModeBtn').is_visible():
|
||||
page.click('#PromptsBox_ms_PageModeBtn') # tužka → oko
|
||||
page.wait_for_timeout(1000)
|
||||
page.click('#PromptsBox_ms_PageModeBtn') # oko → načte milestony
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
for milestone in MILESTONES:
|
||||
search = page.locator('#PromptsBox_ms_SearchTxt')
|
||||
search.wait_for(state='visible', timeout=10_000)
|
||||
search.click()
|
||||
search.fill(milestone)
|
||||
search.press('Enter')
|
||||
|
||||
cb = page.locator('input[id^="PromptsBox_ms_FrontEndCBList_"]').first
|
||||
try:
|
||||
cb.wait_for(state='visible', timeout=8_000)
|
||||
except PWTimeout:
|
||||
print(f" VAROVÁNÍ: '{milestone}' nenalezen!")
|
||||
continue
|
||||
|
||||
if not cb.is_checked():
|
||||
cb.click()
|
||||
print(f" '{milestone}' zaškrtnuto")
|
||||
wait_load(page, 500)
|
||||
|
||||
|
||||
def set_date_param(page, panel_id, date_value, label):
|
||||
if not date_value:
|
||||
return
|
||||
print(f" Parametr {label}: {date_value}")
|
||||
page.click(f'#{panel_id}_ShowHideBtn')
|
||||
page.wait_for_timeout(1000)
|
||||
date_input = page.locator(f'#{panel_id}_DatePickerTxt')
|
||||
date_input.wait_for(state='visible', timeout=10_000)
|
||||
date_input.click()
|
||||
date_input.fill(date_value)
|
||||
date_input.press('Tab')
|
||||
page.wait_for_timeout(500)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Submit a download
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def submit_and_download(page, context):
|
||||
print("Odesílám report (čekám na nové okno)...")
|
||||
|
||||
with context.expect_page() as new_page_info:
|
||||
page.locator('input[value="Submit Report"], button:has-text("Submit Report")').first.click()
|
||||
|
||||
new_page = new_page_info.value
|
||||
new_page.wait_for_url(lambda url: url != 'about:blank', timeout=30_000)
|
||||
print(" Čekám na vygenerování reportu...")
|
||||
new_page.wait_for_selector(
|
||||
'input[value="Download File"], button:has-text("Download File")',
|
||||
timeout=300_000
|
||||
)
|
||||
new_page.wait_for_timeout(500)
|
||||
dbg(new_page, "download-window")
|
||||
|
||||
print(" Nastavuji parametry stahování...")
|
||||
|
||||
target_frame = new_page.main_frame
|
||||
for frame in new_page.frames:
|
||||
if frame.query_selector('select') or frame.query_selector('input[value="Download File"]'):
|
||||
target_frame = frame
|
||||
print(f" Frame nalezen: {frame.url}")
|
||||
break
|
||||
|
||||
for sel in target_frame.query_selector_all('select'):
|
||||
for opt in sel.query_selector_all('option'):
|
||||
val = opt.get_attribute('value') or ''
|
||||
txt = opt.inner_text() or ''
|
||||
if 'vnd.ms-excel' in val or 'vnd.ms-excel' in txt:
|
||||
sel.select_option(value=val)
|
||||
print(" File type: .csv (application/vnd.ms-excel)")
|
||||
break
|
||||
|
||||
for sel in target_frame.query_selector_all('select'):
|
||||
for opt in sel.query_selector_all('option'):
|
||||
if 'attachment' in (opt.get_attribute('value') or '').lower():
|
||||
sel.select_option(value='attachment')
|
||||
break
|
||||
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
|
||||
filename = f"{timestamp}_EDC_MDD3003_QueryDetails.csv"
|
||||
output_path = DOWNLOAD_DIR / filename
|
||||
|
||||
print("Stahuji CSV...")
|
||||
with new_page.expect_download(timeout=60_000) as dl_info:
|
||||
btn = target_frame.query_selector('input[value="Download File"], button:has-text("Download File")')
|
||||
if btn:
|
||||
btn.click()
|
||||
else:
|
||||
new_page.locator('input[value="Download File"], button:has-text("Download File")').first.click()
|
||||
|
||||
download = dl_info.value
|
||||
download.save_as(str(output_path))
|
||||
print(f"\nHotovo! Soubor uložen: {output_path}")
|
||||
|
||||
try:
|
||||
new_page.close()
|
||||
print("Stahovací okno zavřeno.")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hlavní flow
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run():
|
||||
if not PASSWORD:
|
||||
print("Chyba: nastav IMEDIDATA_PASSWORD v souboru .env")
|
||||
sys.exit(1)
|
||||
|
||||
DOWNLOAD_DIR.mkdir(exist_ok=True)
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=False, slow_mo=200)
|
||||
ctx_kwargs = {"accept_downloads": True}
|
||||
|
||||
use_saved = auth_valid()
|
||||
if use_saved:
|
||||
print("Načítám uloženou session (auth.json)...")
|
||||
ctx_kwargs["storage_state"] = str(AUTH_FILE)
|
||||
|
||||
context = browser.new_context(**ctx_kwargs)
|
||||
page = context.new_page()
|
||||
|
||||
logged_in = go_to_select_role(page)
|
||||
|
||||
if not logged_in:
|
||||
if use_saved:
|
||||
print("Session expirovala, mažu auth.json a přihlašuji znovu...")
|
||||
AUTH_FILE.unlink(missing_ok=True)
|
||||
do_login(page, context)
|
||||
go_to_select_role(page)
|
||||
|
||||
select_role(page)
|
||||
navigate_to_reporter(page)
|
||||
open_report(page)
|
||||
|
||||
print("Nastavuji parametry reportu...")
|
||||
set_study_param(page)
|
||||
set_site_group_param(page)
|
||||
set_query_status_param(page)
|
||||
set_milestone_param(page)
|
||||
set_date_param(page, 'PromptsBox_sd', START_DATE, "Start Date")
|
||||
set_date_param(page, 'PromptsBox_ed', END_DATE, "End Date")
|
||||
|
||||
submit_and_download(page, context)
|
||||
|
||||
browser.close()
|
||||
print("Prohlížeč zavřen.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
@@ -0,0 +1,29 @@
|
||||
2026-05-20 17:56:21,647 ERROR Nelze se připojit k MongoDB (mongodb://localhost:27017): localhost:27017: [WinError 10061] No connection could be made because the target machine actively refused it (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 5.0s, Topology Description: <TopologyDescription id: 6a0dd9a0ce7e4c93f3399a61, topology_type: Unknown, servers: [<ServerDescription ('localhost', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('localhost:27017: [WinError 10061] No connection could be made because the target machine actively refused it (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms)')>]>
|
||||
2026-05-20 17:56:45,268 INFO Importuji: downloads/2026-05-20_15-09_EDC_MDD3003_DateofVisit_DataListing.csv → edc.MDD3003_DateofVisit
|
||||
2026-05-20 17:56:48,052 INFO nové: 381 aktualizované: 0 chyby: 0
|
||||
2026-05-20 17:56:48,052 INFO ============================================================
|
||||
2026-05-20 17:56:48,052 INFO Celkem — nové: 381 aktualizované: 0 chyby: 0
|
||||
2026-05-20 18:12:48,691 INFO Importuji: downloads/2026-05-20_15-21_EDC_MDD3003_QueryDetails.csv → edc.2026-05-20_15-21_EDC_MDD3003_QueryDetails
|
||||
2026-05-20 18:12:48,739 INFO nové: 4 aktualizované: 0 chyby: 0
|
||||
2026-05-20 18:12:48,801 INFO Importuji: downloads/2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv → edc.2026-05-20_15-23_EDC_MDD3003_QueryDetails
|
||||
2026-05-20 18:13:03,331 INFO nové: 2091 aktualizované: 0 chyby: 0
|
||||
2026-05-20 18:13:03,332 INFO ============================================================
|
||||
2026-05-20 18:13:03,332 INFO Celkem — nové: 2095 aktualizované: 0 chyby: 0
|
||||
2026-05-20 18:13:31,267 INFO Importuji: downloads/2026-05-20_15-21_EDC_MDD3003_QueryDetails.csv → edc.MDD3003_QueryDetails
|
||||
2026-05-20 18:13:31,306 INFO nové: 4 aktualizované: 0 chyby: 0
|
||||
2026-05-20 18:13:31,354 INFO Importuji: downloads/2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv → edc.MDD3003_QueryDetails
|
||||
2026-05-20 18:13:45,497 INFO nové: 2087 aktualizované: 4 chyby: 0
|
||||
2026-05-20 18:13:45,497 INFO ============================================================
|
||||
2026-05-20 18:13:45,497 INFO Celkem — nové: 2091 aktualizované: 4 chyby: 0
|
||||
2026-05-20 18:14:06,652 INFO Importuji: downloads/2026-05-20_15-21_EDC_MDD3003_QueryDetails.csv → edc.queries
|
||||
2026-05-20 18:14:06,683 INFO nové: 4 aktualizované: 0 chyby: 0
|
||||
2026-05-20 18:14:06,727 INFO Importuji: downloads/2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv → edc.queries
|
||||
2026-05-20 18:14:22,340 INFO nové: 2087 aktualizované: 4 chyby: 0
|
||||
2026-05-20 18:14:22,340 INFO ============================================================
|
||||
2026-05-20 18:14:22,340 INFO Celkem — nové: 2091 aktualizované: 4 chyby: 0
|
||||
2026-05-20 21:56:49,619 INFO Importuji: downloads/2026-05-20_15-21_EDC_MDD3003_QueryDetails.csv → edc.queries + queries_snapshots [2026-05-20]
|
||||
2026-05-20 21:56:49,670 INFO nové: 0 aktualizované: 4 chyby: 0
|
||||
2026-05-20 21:56:49,711 INFO Importuji: downloads/2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv → edc.queries + queries_snapshots [2026-05-20]
|
||||
2026-05-20 21:57:07,554 INFO nové: 0 aktualizované: 2091 chyby: 0
|
||||
2026-05-20 21:57:07,554 INFO ============================================================
|
||||
2026-05-20 21:57:07,554 INFO Celkem — nové: 0 aktualizované: 2095 chyby: 0
|
||||
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"_meta": {
|
||||
"study": "42847922MDD3003",
|
||||
"site_group": "CZE",
|
||||
"report": "_EDC Std Rpt - Data Listing (Data Stream), ReportID=92",
|
||||
"total_forms": 97,
|
||||
"pages": 5,
|
||||
"note": "Kompletni seznam nacist z PromptsPage po vyberu Study+SiteGroup, Form panel, 5 stranek po 20 (posledni 17)"
|
||||
},
|
||||
"known_forms": [
|
||||
"Acknowledgement Reporting Form",
|
||||
"Acknowledgement Upload Form",
|
||||
"Adverse Event of Special Interest",
|
||||
"Adverse Events/Serious Aes",
|
||||
"Alcohol Test",
|
||||
"Arizona Sexual Experiences Scale Summary",
|
||||
"Arizona Sexual Experiences Scale-Female",
|
||||
"Arizona Sexual Experiences Scale-Male",
|
||||
"Date of Visit"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,297 @@
|
||||
"""
|
||||
import_to_mongo.py
|
||||
Verze: 1.0
|
||||
Datum: 2026-05-27
|
||||
|
||||
Import EDC Data Listing CSV do MongoDB (databáze: edc).
|
||||
|
||||
Kolekce: {STUDY}.{FormName} (např. UCO3001.ConcomitantTherapy)
|
||||
Filtr: pouze řádky s SiteGroupName == "CZE"
|
||||
Historie: při změně fields se stará verze uloží do pole history[]
|
||||
Po importu přesune zpracované CSV do downloads/Zpracovano/
|
||||
|
||||
Použití:
|
||||
python import_to_mongo.py # importuje všechny CSV z downloads/
|
||||
python import_to_mongo.py downloads/konkretni.csv # jeden soubor
|
||||
"""
|
||||
|
||||
import csv
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from pymongo import MongoClient, ASCENDING
|
||||
|
||||
MONGO_URI = "mongodb://192.168.1.76:27017"
|
||||
DB_NAME = "edc"
|
||||
DOWNLOADS_DIR = Path(__file__).parent / "downloads"
|
||||
PROCESSED_DIR = DOWNLOADS_DIR / "Zpracovano"
|
||||
|
||||
COUNTRY_FILTER = "CZE"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mapování pevných CSV sloupců
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
FIXED_FIELDS = {
|
||||
"SiteGroupName": "site.group",
|
||||
"SiteID": "site.id",
|
||||
"SiteNumber": "site.number",
|
||||
"Site": "site.name",
|
||||
"SubjectID": "subject.id",
|
||||
"Subject": "subject.label",
|
||||
"CRFVersionID": "form.crfVersionId",
|
||||
"InstanceID": "form.instanceId",
|
||||
"InstanceName": "form.instanceName",
|
||||
"FolderSeq": "form.folderSeq",
|
||||
"Page": "form.page",
|
||||
"RecordID": "form.recordId",
|
||||
"RecordPosition": "form.recordPosition",
|
||||
"LastModifiedDate": "lastModified",
|
||||
}
|
||||
|
||||
INT_CAST = {"form.folderSeq", "form.recordPosition"}
|
||||
|
||||
META_FIELDS = {
|
||||
"StudyName", "SiteGroupParameter", "SiteNumberParameter", "SiteParameter",
|
||||
"SubjectParameter", "FormParameter", "FieldParameter", "FilterField",
|
||||
"FilterValue", "StartDateParameter", "EndDateParameter", "RunUser",
|
||||
"VersionNumber", "PrintDateTime", "TimeZone", "LastModifiedDateSortable",
|
||||
"StartDateSortable", "EndDateSortable", "ErrorMsg",
|
||||
}
|
||||
|
||||
DATE_FORMATS = [
|
||||
"%d %b %Y %H:%M:%S",
|
||||
"%d %b %Y %H:%M:%S:%f",
|
||||
"%d %b %Y",
|
||||
"%d %B %Y",
|
||||
"%Y%m%d %H:%M:%S.%f",
|
||||
"%Y-%m-%d %H:%M:%S",
|
||||
"%m/%d/%Y %I:%M:%S %p",
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_date(value: str) -> str | None:
|
||||
value = value.strip()
|
||||
for fmt in DATE_FORMATS:
|
||||
try:
|
||||
dt = datetime.strptime(value, fmt)
|
||||
return dt.replace(tzinfo=timezone.utc).isoformat()
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def set_nested(doc: dict, path: str, value) -> None:
|
||||
parts = path.split(".")
|
||||
for part in parts[:-1]:
|
||||
doc = doc.setdefault(part, {})
|
||||
doc[parts[-1]] = value
|
||||
|
||||
|
||||
def extract_snapshot_date(filename: str) -> str:
|
||||
match = re.match(r"(\d{4}-\d{2}-\d{2})", Path(filename).name)
|
||||
return match.group(1) if match else datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def parse_collection_name(filename: str) -> str | None:
|
||||
"""
|
||||
Z názvu souboru odvodí kolekci ve formátu STUDY.FormName.
|
||||
Vrátí None pro QueryDetails (mají vlastní flow).
|
||||
"""
|
||||
stem = Path(filename).stem
|
||||
if "QueryDetails" in stem:
|
||||
return None
|
||||
match = re.search(
|
||||
r"EDC_(\w+?)_(?:ALL_|CZE_|[A-Z]{2,3}_)?(.+?)_DataListing",
|
||||
stem, re.IGNORECASE,
|
||||
)
|
||||
if match:
|
||||
study, form = match.group(1), match.group(2)
|
||||
return f"{study}.{form}"
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CSV → dokument
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def map_row(row: dict) -> dict:
|
||||
doc: dict = {}
|
||||
fields: dict = {}
|
||||
field_keys = set(row.keys())
|
||||
|
||||
for col, value in row.items():
|
||||
value = value.strip() if value else ""
|
||||
|
||||
if col in FIXED_FIELDS:
|
||||
path = FIXED_FIELDS[col]
|
||||
if path in INT_CAST:
|
||||
try:
|
||||
value = int(value)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
elif path == "lastModified":
|
||||
parsed = parse_date(value)
|
||||
value = parsed if parsed else value
|
||||
set_nested(doc, path, value)
|
||||
continue
|
||||
|
||||
if col in META_FIELDS:
|
||||
continue
|
||||
|
||||
if re.match(r"^Field\d+(Value|Label)$", col):
|
||||
continue
|
||||
|
||||
n = 1
|
||||
while True:
|
||||
val_key = f"Field{n}Value"
|
||||
lbl_key = f"Field{n}Label"
|
||||
if val_key not in field_keys and lbl_key not in field_keys:
|
||||
break
|
||||
label = (row.get(lbl_key) or "").strip()
|
||||
value = (row.get(val_key) or "").strip()
|
||||
if label and value:
|
||||
parsed = parse_date(value)
|
||||
fields[label] = parsed if parsed else value
|
||||
n += 1
|
||||
|
||||
doc["fields"] = fields
|
||||
return doc
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Import jednoho souboru
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def import_file(csv_path: str, db) -> dict:
|
||||
filename = Path(csv_path).name
|
||||
col_name = parse_collection_name(filename)
|
||||
if col_name is None:
|
||||
print(f" Preskakuji (QueryDetails): {filename}")
|
||||
return {"skipped": True}
|
||||
|
||||
snapshot_date = extract_snapshot_date(filename)
|
||||
collection = db[col_name]
|
||||
|
||||
inserted = changed = unchanged = filtered_out = 0
|
||||
|
||||
with open(csv_path, encoding="utf-8", newline="") as f:
|
||||
reader = csv.DictReader(f, delimiter=",", quotechar='"')
|
||||
|
||||
for row in reader:
|
||||
site_group = (row.get("SiteGroupName") or "").strip()
|
||||
if site_group != COUNTRY_FILTER:
|
||||
filtered_out += 1
|
||||
continue
|
||||
|
||||
doc = map_row(row)
|
||||
record_id = doc.get("form", {}).get("recordId")
|
||||
if not record_id:
|
||||
continue
|
||||
|
||||
doc["sourceFile"] = filename
|
||||
|
||||
existing = collection.find_one({"form.recordId": record_id})
|
||||
|
||||
if existing is None:
|
||||
doc["firstSeen"] = snapshot_date
|
||||
doc["lastSeen"] = snapshot_date
|
||||
doc["history"] = []
|
||||
collection.insert_one(doc)
|
||||
inserted += 1
|
||||
|
||||
elif existing.get("fields") != doc["fields"]:
|
||||
old_entry = {
|
||||
"date": existing.get("lastSeen", snapshot_date),
|
||||
"fields": existing["fields"],
|
||||
}
|
||||
update_doc = {k: v for k, v in doc.items()}
|
||||
update_doc["lastSeen"] = snapshot_date
|
||||
collection.update_one(
|
||||
{"_id": existing["_id"]},
|
||||
{
|
||||
"$push": {"history": old_entry},
|
||||
"$set": update_doc,
|
||||
},
|
||||
)
|
||||
changed += 1
|
||||
|
||||
else:
|
||||
collection.update_one(
|
||||
{"_id": existing["_id"]},
|
||||
{"$set": {"lastSeen": snapshot_date, "sourceFile": filename}},
|
||||
)
|
||||
unchanged += 1
|
||||
|
||||
collection.create_index([("form.recordId", ASCENDING)], unique=True)
|
||||
collection.create_index([("subject.label", ASCENDING)])
|
||||
collection.create_index([("site.number", ASCENDING)])
|
||||
|
||||
stats = {
|
||||
"collection": col_name,
|
||||
"snapshot": snapshot_date,
|
||||
"inserted": inserted,
|
||||
"changed": changed,
|
||||
"unchanged": unchanged,
|
||||
"filtered_out": filtered_out,
|
||||
}
|
||||
print(f" {col_name} [{snapshot_date}]: +{inserted} new, ~{changed} changed, ={unchanged} same, -{filtered_out} non-CZE")
|
||||
return stats
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
paths: list[Path] = []
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
for arg in sys.argv[1:]:
|
||||
p = Path(arg)
|
||||
if p.is_file():
|
||||
paths.append(p)
|
||||
else:
|
||||
print(f"Soubor nenalezen: {arg}")
|
||||
else:
|
||||
paths = sorted(DOWNLOADS_DIR.glob("*_DataListing.csv"))
|
||||
|
||||
if not paths:
|
||||
print("Zadne CSV soubory k importu.")
|
||||
return
|
||||
|
||||
print(f"Nalezeno {len(paths)} souboru.\n")
|
||||
|
||||
client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
|
||||
client.admin.command("ping")
|
||||
db = client[DB_NAME]
|
||||
|
||||
PROCESSED_DIR.mkdir(exist_ok=True)
|
||||
|
||||
total = {"inserted": 0, "changed": 0, "unchanged": 0}
|
||||
|
||||
for csv_path in paths:
|
||||
print(f"Import: {csv_path.name}")
|
||||
stats = import_file(str(csv_path), db)
|
||||
if not stats.get("skipped"):
|
||||
for k in total:
|
||||
total[k] += stats.get(k, 0)
|
||||
|
||||
dest = PROCESSED_DIR / csv_path.name
|
||||
shutil.move(str(csv_path), str(dest))
|
||||
print(f" -> presunut do Zpracovano/")
|
||||
|
||||
client.close()
|
||||
|
||||
print(f"\nCelkem: +{total['inserted']} new, ~{total['changed']} changed, ={total['unchanged']} same")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,103 @@
|
||||
{
|
||||
"study": "42847922MDD3003",
|
||||
"report_id": 92,
|
||||
"forms": [
|
||||
"Acknowledgement Reporting Form",
|
||||
"Acknowledgement Upload Form",
|
||||
"Adverse Event of Special Interest",
|
||||
"Adverse Events/Serious Aes",
|
||||
"Alcohol Test",
|
||||
"Arizona Sexual Experiences Scale Summary",
|
||||
"Arizona Sexual Experiences Scale-Female",
|
||||
"Arizona Sexual Experiences Scale-Male",
|
||||
"Change in Background Antidepressant",
|
||||
"Clinical Global Impression - S (Depression)",
|
||||
"Clinical Outcome Assessments Completion Status",
|
||||
"Columbia-Suicide Severity Rating Scale - Baseline/Screening Version",
|
||||
"Columbia-Suicide Severity Rating Scale - Since Last Visit",
|
||||
"Comments",
|
||||
"Concomitant Therapy / Medication",
|
||||
"Date of Visit",
|
||||
"Death Information",
|
||||
"Demographics",
|
||||
"Derivation (operational form)",
|
||||
"Drug Expiry Information",
|
||||
"Drug Testing",
|
||||
"DSL Index Page (Must have for DSL functionality; not visible to the sites) (operational form)",
|
||||
"DUMMY (operational form)",
|
||||
"Educational Level",
|
||||
"Enrollment",
|
||||
"EQ-5D-5L",
|
||||
"Evaluation of Response - Induction Phase",
|
||||
"Evaluation of Response - Stabilization Phase",
|
||||
"General Medical History",
|
||||
"Inclusion/Exclusion Criteria",
|
||||
"Insomnia Severity Index",
|
||||
"Insomnia Severity Index (Clinician Version)",
|
||||
"Integrated Medication Kit Accountability Information Double Blind Maintenance",
|
||||
"Integrated Medication Kit Accountability Information Double Blind Part 1",
|
||||
"Integrated Medication Kit Accountability Information Open Label Part 2",
|
||||
"Interim Investigator Signature",
|
||||
"IRT Stratification",
|
||||
"Local Chemistry (Unscheduled)",
|
||||
"Local Hematology (Unscheduled)",
|
||||
"Local Labs for Background Antidepressant Compliance",
|
||||
"Menstrual Cycle Tracking",
|
||||
"Menstrual Cycle Tracking Log",
|
||||
"MGH ATRQ, Geriatric, Section I and II",
|
||||
"MGH ATRQ, Non-Geriatric, Section I and II",
|
||||
"MGH-ATRQ - Summary",
|
||||
"MGH-ATRQ - Therapy Questions",
|
||||
"Mini-Mental State Examination",
|
||||
"Neurologic Examination",
|
||||
"Patient Global Impression of Change - Depression/Insomnia",
|
||||
"Patient Global Impression of Severity - Insomnia",
|
||||
"Patient Health Questionnaire - 9 Item",
|
||||
"Perceived Treatment Group Assignment",
|
||||
"Periodic Investigator's EDC Review Acknowledgement",
|
||||
"Physical Examination",
|
||||
"Physician Withdrawal Checklist (PWC)",
|
||||
"Pregnancy Test",
|
||||
"Preplanned Surgeries/Procedures",
|
||||
"Procedures",
|
||||
"PROMIS - Sleep Disturbance",
|
||||
"Protocol Amendment Implementation (Operational Form)",
|
||||
"Psychiatric History for Major Depressive Disorder",
|
||||
"Psychotherapy",
|
||||
"Psychotherapy v2.0",
|
||||
"Randomization",
|
||||
"Relapse - MAJOR DEPRESSIVE DISORDER",
|
||||
"Relapse Criteria",
|
||||
"Relapse Criteria Unscheduled",
|
||||
"Relevant Additional Drug Therapies",
|
||||
"Relevant information Selection",
|
||||
"Relevant Local Laboratory",
|
||||
"Relevant Local Laboratory Data",
|
||||
"Relevant Medical History",
|
||||
"Relevant Preplanned Surgeries/Procedures",
|
||||
"Relevant Procedures",
|
||||
"Relevant Study Medication",
|
||||
"Relevant Tests",
|
||||
"Safety Report Form",
|
||||
"SCID-CT",
|
||||
"SCID-CT Insomnia Disorder Supplemental",
|
||||
"Sheehan Disability Scale",
|
||||
"SIGH-D-17",
|
||||
"Site Independent Qualification Assessment",
|
||||
"Site/Invest Identification",
|
||||
"Structured Interview Guide for the Montgomery-Asberg Depression Rating Scale",
|
||||
"Study Drug Administration Double Blind Maintenance",
|
||||
"Study Drug Administration Double Blind Part 1",
|
||||
"Study Drug Administration Open Label Part 2 - Induction",
|
||||
"Study Drug Administration Open Label Part 2 - Stabilization",
|
||||
"Subject",
|
||||
"Subject Site Switch",
|
||||
"Treatment Disposition",
|
||||
"Treatment Unblinding",
|
||||
"Trial Disposition Completion/ Discontinuation",
|
||||
"Unscheduled Assessments",
|
||||
"Unsuccessful Contact Attempts",
|
||||
"Vital Signs",
|
||||
"Vital Signs (Unscheduled)"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
[
|
||||
{"name": "_EDC Std Rpt - Changes/Queries after SDV", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=97"},
|
||||
{"name": "_EDC Std Rpt - Changes/Queries after SDV", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=98"},
|
||||
{"name": "_EDC Std Rpt - Clinical Safety Case Report", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=93"},
|
||||
{"name": "_EDC Std Rpt - CTMS Activities Listing", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=111"},
|
||||
{"name": "_EDC Std Rpt - Data Cleaning Progress and Finalization", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=102"},
|
||||
{"name": "_EDC Std Rpt - Data Cleaning Progress and Finalization", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=105"},
|
||||
{"name": "_EDC Std Rpt - Data Cleaning Progress Visit Detail", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=113"},
|
||||
{"name": "_EDC Std Rpt - Data Cleaning Progress Visit Detail", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=112"},
|
||||
{"name": "_EDC Std Rpt - Data Listing", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=92"},
|
||||
{"name": "_EDC Std Rpt - Data Listing", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=99"},
|
||||
{"name": "_EDC Std Rpt - eCRF Version Comparison", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=127"},
|
||||
{"name": "_EDC Std Rpt - eCRF Version Comparison", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=128"},
|
||||
{"name": "_EDC Std Rpt - Expected eCRF Fields Not Entered", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=130"},
|
||||
{"name": "_EDC Std Rpt - Expected eCRF Fields Not Entered", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=129"},
|
||||
{"name": "_EDC Std Rpt - GMS SAE Report", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=91"},
|
||||
{"name": "_EDC Std Rpt - ICF Log Listing", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=132"},
|
||||
{"name": "_EDC Std Rpt - ICF Log Listing", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=131"},
|
||||
{"name": "_EDC Std Rpt - Inactivated Datalist", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=133"},
|
||||
{"name": "_EDC Std Rpt - Inactivated Datalist", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=134"},
|
||||
{"name": "_EDC Std Rpt - Inactive DataPages", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=136"},
|
||||
{"name": "_EDC Std Rpt - Inactive DataPages", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=135"},
|
||||
{"name": "_EDC Std Rpt - J&J CAR-T SAE Report", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=137"},
|
||||
{"name": "_EDC Std Rpt - Local Lab Normal Ranges", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=138"},
|
||||
{"name": "_EDC Std Rpt - Medical Affairs SAE Report", "description": "For MA Pre-Configured Library", "type": "Global", "url": "PromptsPage.aspx?ReportID=139"},
|
||||
{"name": "_EDC Std Rpt - Missing Pages", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=141"},
|
||||
{"name": "_EDC Std Rpt - Missing Pages", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=140"},
|
||||
{"name": "_EDC Std Rpt - Missing Visits", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=149"},
|
||||
{"name": "_EDC Std Rpt - Missing Visits", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=148"},
|
||||
{"name": "_EDC Std Rpt - Monitor SDV Planning", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=151"},
|
||||
{"name": "_EDC Std Rpt - Monitor SDV Planning", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=150"},
|
||||
{"name": "_EDC Std Rpt - Non Conformant Data Listing", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=152"},
|
||||
{"name": "_EDC Std Rpt - Pages to Review / SDV / Freeze", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=154"},
|
||||
{"name": "_EDC Std Rpt - Pages to Review / SDV / Freeze", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=153"},
|
||||
{"name": "_EDC Std Rpt - Query Aging", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=156"},
|
||||
{"name": "_EDC Std Rpt - Query Aging", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=155"},
|
||||
{"name": "_EDC Std Rpt - Query Details", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=157"},
|
||||
{"name": "_EDC Std Rpt - Query Details", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=164"},
|
||||
{"name": "_EDC Std Rpt - Query Trend", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=168"},
|
||||
{"name": "_EDC Std Rpt - Query Trend", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=166"},
|
||||
{"name": "_EDC Std Rpt - Re-UAT Compare Report", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=195"},
|
||||
{"name": "_EDC Std Rpt - Safety Gateway Cover Sheet", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=171"},
|
||||
{"name": "_EDC Std Rpt - Safety Gateway e2b XML Case Report", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=172"},
|
||||
{"name": "_EDC Std Rpt - Safety Gateway Reconciliation", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=179"},
|
||||
{"name": "_EDC Std Rpt - Safety Gateway System Configuration", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=194"},
|
||||
{"name": "_EDC Std Rpt - SafetyGateway Mapping Configuration", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=180"},
|
||||
{"name": "_EDC Std Rpt - Self Evident Corrections", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=178"},
|
||||
{"name": "_EDC Std Rpt - Self Evident Corrections", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=177"},
|
||||
{"name": "_EDC Std Rpt - Signature History Report", "description": "", "type": "Global/Data Stream", "url": "PromptsPage.aspx?ReportID=176"},
|
||||
{"name": "_EDC Std Rpt - Site Payment", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=null"},
|
||||
{"name": "_EDC Std Rpt - Special Characters", "description": "", "type": "Global/Data Stream", "url": "PromptsPage.aspx?ReportID=null"},
|
||||
{"name": "360 Data Cleaning Progress Dashboard", "description": "Track eCRF Data Cleaning Progress", "type": "Dashboard", "url": "PromptsPage.aspx?ReportID=null"},
|
||||
{"name": "360 Enrollment Tracking Dashboard", "description": "Track Subject Enrollment Performance", "type": "Dashboard", "url": "PromptsPage.aspx?ReportID=null"},
|
||||
{"name": "360 Query Management Dashboard", "description": "Manage Queries", "type": "Dashboard", "url": "PromptsPage.aspx?ReportID=null"},
|
||||
{"name": "Audit Trail", "description": "Audit Trail Report", "type": "Standard", "url": "PromptsPage.aspx?ReportID=null"},
|
||||
{"name": "Data Listing", "description": "Data Listing Report", "type": "Standard", "url": "PromptsPage.aspx?ReportID=null"},
|
||||
{"name": "Stream-Query Detail", "description": "Query Detail Report", "type": "Stream", "url": "PromptsPage.aspx?ReportID=null"}
|
||||
]
|
||||
@@ -0,0 +1,11 @@
|
||||
from download_edc_datalistings import download_datalisting
|
||||
|
||||
download_datalisting(
|
||||
study="77242113UCO3001",
|
||||
forms=[
|
||||
"Trial Disposition (Completion / Discontinuation)",
|
||||
"Date of Visit",
|
||||
"Concomitant Therapy",
|
||||
],
|
||||
country="CZE",
|
||||
)
|
||||
@@ -0,0 +1,77 @@
|
||||
{
|
||||
"study": "77242113UCO3001",
|
||||
"report_id": 92,
|
||||
"forms": [
|
||||
"Acknowledgement Reporting Form",
|
||||
"Acknowledgement Upload Form",
|
||||
"Additional Liver Event Assessment Forms",
|
||||
"Advanced Therapy Treatment Failure Reason",
|
||||
"Adverse Events/Serious AEs",
|
||||
"Alcohol Consumption",
|
||||
"Axial Spondyloarthropathy Diagnosis Information",
|
||||
"Clinical Outcome Assessments Completion Status",
|
||||
"Concomitant Therapy",
|
||||
"Consents / Withdrawal of Consents for Optional Research",
|
||||
"Corticosteroid, Immunomodulator And Oral Aminosalicylates History",
|
||||
"Date of Visit",
|
||||
"Death Information",
|
||||
"Demographics",
|
||||
"Derivation (operational form)",
|
||||
"DSL Index Page (Must have for DSL functionality; not visible to the sites) (operational form)",
|
||||
"DUMMY (operational form)",
|
||||
"Endoscopy Information",
|
||||
"Enrollment",
|
||||
"Family History specific to Hepatic Event",
|
||||
"Food/Liquid Fasting Compliance",
|
||||
"General Medical History",
|
||||
"GI Related Surgeries and Procedures",
|
||||
"Group Selection",
|
||||
"Hepatic Event - Other Risk Factors",
|
||||
"History of GI Past Related Surgeries/Procedures",
|
||||
"Inclusion/Exclusion Criteria",
|
||||
"Integrated Medication Kit Accountability Information",
|
||||
"Interim Investigator Signature",
|
||||
"Intestinal Ultrasound",
|
||||
"Limitation on Retention of Samples",
|
||||
"Liver Biopsy",
|
||||
"Liver Chemistry Abnormalities Assessment Form",
|
||||
"Liver Event Case of AEs",
|
||||
"Liver Event Chemistry Analytes",
|
||||
"Liver Event Level 1 Analytes",
|
||||
"Liver Event Level 2 Analytes",
|
||||
"Liver Event Workup Completion Status",
|
||||
"Liver Imaging Assessment",
|
||||
"Liver-related Signs and Symptoms of Hypersensitivity",
|
||||
"Liver-related Signs and Symptoms of Liver Injury",
|
||||
"Medical Encounters",
|
||||
"Medical History: Liver-related Diseases",
|
||||
"Periodic Investigator's EDC Review Acknowledgement",
|
||||
"Pharmacokinetics, Pharmacodynamic and Biomarker Sample Collection",
|
||||
"Preplanned Surgeries/Procedures",
|
||||
"Protocol Amendment Implementation (Operational Form)",
|
||||
"Randomization",
|
||||
"Relevant Additional Drug Therapies",
|
||||
"Relevant Information Selection",
|
||||
"Relevant Local Laboratory",
|
||||
"Relevant Local Laboratory Data",
|
||||
"Relevant Procedures",
|
||||
"Relevant Study Medication",
|
||||
"Relevant Tests",
|
||||
"Safety Report Form",
|
||||
"Screening for Tuberculosis",
|
||||
"Site/Invest Identification",
|
||||
"Study Drug Administration",
|
||||
"Subject",
|
||||
"Subject Site Switch",
|
||||
"Substance Use Alcohol",
|
||||
"Substance Use Tobacco/Nicotine",
|
||||
"Treatment Disposition (End of treatment)",
|
||||
"Treatment Unblinding",
|
||||
"Trial Disposition (Completion / Discontinuation)",
|
||||
"Tuberculosis Testing and Results",
|
||||
"Ulcerative Colitis Disease History",
|
||||
"Ulcerative Colitis Medication History",
|
||||
"Unscheduled Assessments",
|
||||
"Vital Signs"
|
||||
]
|
||||
}
|
||||