Initial commit — clean history (removed large test files, browser profiles, Medidata/Clario downloads)

This commit is contained in:
2026-06-01 15:36:31 +02:00
commit bb604e593e
1304 changed files with 116480 additions and 0 deletions
+5
View File
@@ -0,0 +1,5 @@
IMEDIDATA_USERNAME=vladimir.buzalka
IMEDIDATA_PASSWORD=Mar2026Ax162q8+
DOWNLOAD_DIR=./downloads
+489
View File
@@ -0,0 +1,489 @@
"""
download_report.py
NAHRAZENO skriptem download_edc_datalistings.py
Původně: stahování Data Listing reportů pro studii MDD3003 (CZE).
"""
import os
import sys
from datetime import datetime, timedelta
from pathlib import Path
from dotenv import load_dotenv
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
import tkinter as tk
from tkinter import simpledialog
load_dotenv(Path(__file__).parent / ".env")
USERNAME = os.getenv("IMEDIDATA_USERNAME", "vladimir.buzalka")
PASSWORD = os.getenv("IMEDIDATA_PASSWORD", "")
DOWNLOAD_DIR = Path(__file__).parent / "downloads"
AUTH_FILE = Path(__file__).parent / "auth.json"
AUTH_MAX_AGE_DAYS = 7
LOGIN_URL = "https://login.imedidata.com/login"
SELECT_ROLE_URL = (
"https://jnjja.mdsol.com/MedidataRave/SelectRole.aspx"
"?client_division_uuid=e5de55d5-a414-4bd1-9abe-18e96fd5475d"
"&study_group_uuid=b0793ca6-33ec-44e8-883b-6fc1a4b671c4"
"&studygroup_id=107981"
)
STUDY_NAME = "42847922MDD3003"
SITE_GROUP = "CZE"
FORM_NAMES = [
"Date of Visit",
"Vital Signs",
"Interim Investigator Signature",
]
REPORT_ID = 92 # _EDC Std Rpt - Data Listing (Data Stream)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def auth_valid():
if not AUTH_FILE.exists():
return False
age = datetime.now() - datetime.fromtimestamp(AUTH_FILE.stat().st_mtime)
return age < timedelta(days=AUTH_MAX_AGE_DAYS)
def wait_load(page, extra_ms=1000):
"""Čeká na 'load' event + extra pauza. Rave nikdy nedosáhne networkidle."""
try:
page.wait_for_load_state("load", timeout=20_000)
except PWTimeout:
pass
page.wait_for_timeout(extra_ms)
def dbg(page, label):
print(f"[{label}] URL: {page.url}")
# ---------------------------------------------------------------------------
# Login
# ---------------------------------------------------------------------------
def _ask_otp_popup():
"""Zobrazí GUI dialog pro zadání OKTA OTP kódu."""
root = tk.Tk()
root.withdraw()
root.lift()
root.attributes("-topmost", True)
otp = simpledialog.askstring(
"OKTA MFA",
"Zadej OTP kód z OKTA (6 číslic):",
parent=root,
)
root.destroy()
return (otp or "").strip()
def do_login(page, context):
print("Přihlašuji se do iMedidata...")
page.goto(LOGIN_URL)
wait_load(page, 500)
dbg(page, "login-page")
# Pole username a password mají jméno session[username] / session[password]
page.wait_for_selector('input[name="session[username]"]', timeout=10_000)
page.fill('input[name="session[username]"]', USERNAME)
page.fill('input[name="session[password]"]', PASSWORD)
page.click('button[type="submit"]')
# Čekáme na přesměrování — může jít přes OKTA nebo rovnou na home
wait_load(page, 2000)
dbg(page, "after-signin")
# OKTA MFA?
if _okta_mfa_present(page):
print("\n*** OKTA MFA vyžadována! ***")
otp = _ask_otp_popup()
if not otp:
print("CHYBA: OTP nebylo zadáno.")
sys.exit(1)
_fill_otp(page, otp)
# Čekáme na zpracování OTP a redirect zpět na iMedidata
wait_load(page, 3000)
dbg(page, "after-otp")
# Počkáme až budeme na home.imedidata.com
try:
page.wait_for_url("**/home.imedidata.com**", timeout=30_000)
except PWTimeout:
dbg(page, "wait-home-timeout")
dbg(page, "final-login")
if "home.imedidata.com" not in page.url:
print("CHYBA: Přihlášení se nezdařilo! Zkontroluj heslo nebo OKTA kód.")
input("Zmáčkni Enter pro ukončení...")
sys.exit(1)
context.storage_state(path=str(AUTH_FILE))
print("Session uložena do auth.json")
def _okta_mfa_present(page):
if "okta" in page.url.lower():
return True
for sel in [
'input[name="answer"]',
'input[name*="otp"]',
'input[name*="code"]',
'input[placeholder*="code" i]',
]:
if page.query_selector(sel):
return True
return False
def _fill_otp(page, otp):
for sel in [
'input[name="answer"]',
'input[name*="otp"]',
'input[name*="code"]',
'input[type="tel"]',
'input[placeholder*="code" i]',
]:
el = page.query_selector(sel)
if el:
el.fill(otp)
page.keyboard.press("Enter")
return
# Záložní: zkusíme první viditelný text input
page.keyboard.type(otp)
page.keyboard.press("Enter")
# ---------------------------------------------------------------------------
# Navigace po přihlášení
# ---------------------------------------------------------------------------
def go_to_select_role(page):
"""Přejde na SelectRole stránku a vrátí True pokud jsme tam skutečně."""
print(f"Navigace na SelectRole...")
try:
page.goto(SELECT_ROLE_URL)
except Exception:
# Rave dělá server-side redirect (ERR_ABORTED) — zkontrolujeme URL až po načtení
pass
wait_load(page, 1500)
dbg(page, "select-role")
return "login" not in page.url.lower() and "okta" not in page.url.lower()
def select_role(page):
"""Vybere Site Manager a klikne Continue."""
print("Vybírám roli Site Manager...")
# Počkáme na select element
try:
page.wait_for_selector("select", timeout=10_000)
except PWTimeout:
dbg(page, "no-select-found")
return
# Najdeme select s option Site Manager
selects = page.query_selector_all("select")
found = False
for sel_el in selects:
opts = sel_el.query_selector_all("option")
for opt in opts:
txt = (opt.inner_text() or "").strip()
if "site manager" in txt.lower():
sel_el.select_option(label=txt)
found = True
print(f" Vybráno: '{txt}'")
break
if found:
break
if not found:
print(" VAROVÁNÍ: Option 'Site Manager' nenalezena, zkouším kliknout na text...")
try:
page.get_by_text("Site Manager", exact=False).first.click()
except Exception as e:
print(f" {e}")
# Klikneme Continue
for btn_sel in [
'input[value="Continue"]',
'input[type="submit"]',
'button:has-text("Continue")',
'button[type="submit"]',
]:
try:
btn = page.query_selector(btn_sel)
if btn:
btn.click()
break
except Exception:
continue
wait_load(page, 2000)
dbg(page, "after-role")
def navigate_to_reporter(page):
print("Klikám na Reporter...")
try:
page.wait_for_selector('a:has-text("Reporter")', timeout=15_000)
page.click('a:has-text("Reporter")')
wait_load(page, 1500)
dbg(page, "reporter")
except PWTimeout:
dbg(page, "reporter-not-found")
raise
def open_report(page):
print(f"Klikám na report ID={REPORT_ID} (Data Listing - Data Stream)...")
selector = f'a[href="PromptsPage.aspx?ReportID={REPORT_ID}"]'
try:
page.wait_for_selector(selector, timeout=15_000)
page.click(selector)
wait_load(page, 2000)
dbg(page, "report-opened")
except PWTimeout:
dbg(page, "report-not-found")
raise
# ---------------------------------------------------------------------------
# Parametry reportu
# ---------------------------------------------------------------------------
def set_study_param(page):
"""Rozbalí Study panel a vybere 42847922MDD3003."""
print(f" Parametr Study: {STUDY_NAME}")
page.click('#PromptsBox_st_ShowHideBtn')
page.wait_for_timeout(1500)
# Checkbox index 0 = 42847922MDD3003 (ověřeno dříve)
page.wait_for_selector('#PromptsBox_st_FrontEndCBList_0', timeout=10_000)
cb = page.locator('#PromptsBox_st_FrontEndCBList_0')
if not cb.is_checked():
cb.check()
wait_load(page, 3000)
dbg(page, "after-study")
def set_site_group_param(page):
"""Rozbalí Site Group, vybere CZE a zaškrtne Include Sub Site Groups."""
print(f" Parametr Site Group: {SITE_GROUP}")
# Rozbalit Site Group panel
page.click('#PromptsBox_sg_ShowHideBtn')
page.wait_for_timeout(1500)
# Vybrat CZE a spustit change event (jinak postback nepřijde)
page.wait_for_selector('#PromptsBox_sg_List', timeout=10_000)
page.select_option('#PromptsBox_sg_List', label=SITE_GROUP)
page.evaluate("document.querySelector('#PromptsBox_sg_List').dispatchEvent(new Event('change', {bubbles:true}))")
wait_load(page, 2000)
# Include Sub Site Groups
print(" Include Sub Site Groups: zapnuto")
cb = page.locator('#PromptsBox_sg_CheckBox')
if not cb.is_checked():
cb.check()
page.evaluate("document.querySelector('#PromptsBox_sg_CheckBox').dispatchEvent(new Event('change', {bubbles:true}))")
wait_load(page, 2000)
# Zavřít panel = potvrzení výběru, spustí postback pro Form
page.click('#PromptsBox_sg_ShowHideBtn')
wait_load(page, 3000)
dbg(page, "after-site-group")
def set_form_param(page, form_name):
"""Rozbalí Form panel (pokud je zavřený) a zaškrtne formulář.
Panel je SingleSelection=1, takže nový výběr automaticky odznačí předchozí."""
print(f" Parametr Form: {form_name}")
# Otevřít panel jen pokud je zavřený (kontrola přes style.display)
is_closed = page.locator('#PromptsBox_fm2_div').evaluate('el => el.style.display') == 'none'
if is_closed:
page.click('#PromptsBox_fm2_ShowHideBtn')
page.wait_for_timeout(2000)
# Po předchozím stažení je panel v "locked" módu.
# 1. klik na tužku → vymaže výběr, tlačítko se změní na oko
# 2. klik na oko → načte seznam všech formulářů
if page.locator('#PromptsBox_fm2_PageModeBtn').is_visible():
page.click('#PromptsBox_fm2_PageModeBtn') # tužka → oko
page.wait_for_timeout(1000)
page.click('#PromptsBox_fm2_PageModeBtn') # oko → načte formuláře
page.wait_for_timeout(2000)
# Vyhledat formulář — klik zajistí focus, Enter spustí ajaxSelectionGridSearchBoxOnKeypress
search = page.locator('#PromptsBox_fm2_SearchTxt')
search.wait_for(state='visible', timeout=10_000)
search.click()
search.fill(form_name)
search.press('Enter')
# Počkáme až AJAX přepíše DOM se seznamem výsledků
cb_locator = page.locator('input[id^="PromptsBox_fm2_FrontEndCBList_"]').first
try:
cb_locator.wait_for(state='visible', timeout=8_000)
except PWTimeout:
print(f" VAROVÁNÍ: '{form_name}' nenalezen nebo timeout!")
return
# SingleSelection=1: klik na nový checkbox automaticky odznačí předchozí
# Locator se vyhodnotí čerstvě — žádný stale element handle
if not cb_locator.is_checked():
cb_locator.click()
print(f" '{form_name}' zaškrtnuto")
wait_load(page, 500)
# ---------------------------------------------------------------------------
# Submit a download
# ---------------------------------------------------------------------------
def submit_and_download(page, context, form_name):
print("Odesílám report (čekám na nové okno)...")
with context.expect_page() as new_page_info:
page.locator('input[value="Submit Report"], button:has-text("Submit Report")').first.click()
new_page = new_page_info.value
new_page.wait_for_url(lambda url: url != 'about:blank', timeout=30_000)
# Čekáme až se zobrazí Download File — stránka nejdřív ukazuje "Loading"
print(" Čekám na vygenerování reportu...")
new_page.wait_for_selector(
'input[value="Download File"], button:has-text("Download File")',
timeout=300_000 # až 5 minut pro velké reporty
)
new_page.wait_for_timeout(500)
dbg(new_page, "download-window")
# Nastavení parametrů stahování
print(" Nastavuji parametry stahování...")
# Separator: čárka (default)
sep = new_page.query_selector('input[name*="Separator"], input[name*="separator"]')
if sep:
sep.fill(',')
# File type: .csv
# Formulář je v iframu — najdeme správný frame
target_frame = new_page.main_frame
for frame in new_page.frames:
if frame.query_selector('select') or frame.query_selector('input[value="Download File"]'):
target_frame = frame
print(f" Frame nalezen: {frame.url}")
break
# File type: .csv (application/vnd.ms-excel)
for sel in target_frame.query_selector_all('select'):
for opt in sel.query_selector_all('option'):
val = opt.get_attribute('value') or ''
txt = opt.inner_text() or ''
if 'vnd.ms-excel' in val or 'vnd.ms-excel' in txt:
sel.select_option(value=val)
print(" File type: .csv (application/vnd.ms-excel)")
break
# Export type: attachment
for sel in target_frame.query_selector_all('select'):
for opt in sel.query_selector_all('option'):
if 'attachment' in (opt.get_attribute('value') or '').lower():
sel.select_option(value='attachment')
break
# Save as Unicode: necháme nezaškrtnuté (default)
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
form_slug = form_name.replace(" ", "")
filename = f"{timestamp}_EDC_MDD3003_{form_slug}_DataListing.csv"
output_path = DOWNLOAD_DIR / filename
print("Stahuji CSV...")
with new_page.expect_download(timeout=60_000) as dl_info:
btn = target_frame.query_selector('input[value="Download File"], button:has-text("Download File")')
if btn:
btn.click()
else:
new_page.locator('input[value="Download File"], button:has-text("Download File")').first.click()
download = dl_info.value
download.save_as(str(output_path))
print(f"\nHotovo! Soubor uložen: {output_path}")
try:
new_page.close()
print("Stahovací okno zavřeno.")
except Exception:
pass
return output_path
# ---------------------------------------------------------------------------
# Hlavní flow
# ---------------------------------------------------------------------------
def run():
if not PASSWORD:
print("Chyba: nastav IMEDIDATA_PASSWORD v souboru .env")
sys.exit(1)
DOWNLOAD_DIR.mkdir(exist_ok=True)
with sync_playwright() as p:
browser = p.chromium.launch(headless=False, slow_mo=200)
ctx_kwargs = {"accept_downloads": True}
use_saved = auth_valid()
if use_saved:
print("Načítám uloženou session (auth.json)...")
ctx_kwargs["storage_state"] = str(AUTH_FILE)
context = browser.new_context(**ctx_kwargs)
page = context.new_page()
# Přejdeme na SelectRole
logged_in = go_to_select_role(page)
if not logged_in:
if use_saved:
print("Session expirovala, mažu auth.json a přihlašuji znovu...")
AUTH_FILE.unlink(missing_ok=True)
do_login(page, context)
go_to_select_role(page)
# Krok 4: výběr role → přiřadí session ID
select_role(page)
# Krok 5: Reporter
navigate_to_reporter(page)
# Krok 6: otevření reportu
open_report(page)
# Krok 7: nastavení parametrů (Study a Site Group jednou, Form v smyčce)
print("Nastavuji parametry reportu...")
set_study_param(page)
set_site_group_param(page)
# Krok 8: smyčka přes formuláře
for form_name in FORM_NAMES:
print(f"\n=== Stahuji formulář: {form_name} ===")
set_form_param(page, form_name)
submit_and_download(page, context, form_name)
browser.close()
print("Prohlížeč zavřen.")
if __name__ == "__main__":
run()
+440
View File
@@ -0,0 +1,440 @@
"""
download_uco3001.py
NAHRAZENO skriptem download_edc_datalistings.py
Původně: stahování Data Listing reportů (ReportID=92) pro studii UCO3001.
"""
import os
import sys
from datetime import datetime, timedelta
from pathlib import Path
from dotenv import load_dotenv
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
import tkinter as tk
from tkinter import simpledialog
load_dotenv(Path(__file__).parent / ".env")
USERNAME = os.getenv("IMEDIDATA_USERNAME", "vladimir.buzalka")
PASSWORD = os.getenv("IMEDIDATA_PASSWORD", "")
DOWNLOAD_DIR = Path(__file__).parent / "downloads"
AUTH_FILE = Path(__file__).parent / "auth.json"
AUTH_MAX_AGE_DAYS = 7
LOGIN_URL = "https://login.imedidata.com/login"
SELECT_ROLE_URL = (
"https://jnjja.mdsol.com/MedidataRave/SelectRole.aspx"
"?client_division_uuid=e5de55d5-a414-4bd1-9abe-18e96fd5475d"
"&study_group_uuid=b0793ca6-33ec-44e8-883b-6fc1a4b671c4"
"&studygroup_id=107981"
)
STUDY_SEARCH = "77242113UCO3001" # hledáme podle podřetězce v názvu studie
REPORT_ID = 92 # _EDC Std Rpt - Data Listing (Data Stream)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def auth_valid():
if not AUTH_FILE.exists():
return False
age = datetime.now() - datetime.fromtimestamp(AUTH_FILE.stat().st_mtime)
return age < timedelta(days=AUTH_MAX_AGE_DAYS)
def wait_load(page, extra_ms=1000):
try:
page.wait_for_load_state("load", timeout=20_000)
except PWTimeout:
pass
page.wait_for_timeout(extra_ms)
def dbg(page, label):
print(f"[{label}] URL: {page.url}")
# ---------------------------------------------------------------------------
# Login
# ---------------------------------------------------------------------------
def _ask_otp_popup():
root = tk.Tk()
root.withdraw()
root.lift()
root.attributes("-topmost", True)
otp = simpledialog.askstring("OKTA MFA", "Zadej OTP kód z OKTA (6 číslic):", parent=root)
root.destroy()
return (otp or "").strip()
def do_login(page, context):
print("Přihlašuji se do iMedidata...")
page.goto(LOGIN_URL)
wait_load(page, 500)
page.wait_for_selector('input[name="session[username]"]', timeout=10_000)
page.fill('input[name="session[username]"]', USERNAME)
page.fill('input[name="session[password]"]', PASSWORD)
page.click('button[type="submit"]')
wait_load(page, 2000)
dbg(page, "after-signin")
if _okta_mfa_present(page):
print("\n*** OKTA MFA vyžadována! ***")
otp = _ask_otp_popup()
if not otp:
print("CHYBA: OTP nebylo zadáno.")
sys.exit(1)
_fill_otp(page, otp)
wait_load(page, 3000)
try:
page.wait_for_url("**/home.imedidata.com**", timeout=30_000)
except PWTimeout:
dbg(page, "wait-home-timeout")
if "home.imedidata.com" not in page.url:
print("CHYBA: Přihlášení se nezdařilo!")
sys.exit(1)
context.storage_state(path=str(AUTH_FILE))
print("Session uložena do auth.json")
def _okta_mfa_present(page):
if "okta" in page.url.lower():
return True
for sel in ['input[name="answer"]', 'input[name*="otp"]',
'input[name*="code"]', 'input[placeholder*="code" i]']:
if page.query_selector(sel):
return True
return False
def _fill_otp(page, otp):
for sel in ['input[name="answer"]', 'input[name*="otp"]',
'input[name*="code"]', 'input[type="tel"]', 'input[placeholder*="code" i]']:
el = page.query_selector(sel)
if el:
el.fill(otp)
page.keyboard.press("Enter")
return
page.keyboard.type(otp)
page.keyboard.press("Enter")
# ---------------------------------------------------------------------------
# Navigace
# ---------------------------------------------------------------------------
def go_to_select_role(page):
print("Navigace na SelectRole...")
try:
page.goto(SELECT_ROLE_URL)
except Exception:
pass
wait_load(page, 1500)
dbg(page, "select-role")
return "login" not in page.url.lower() and "okta" not in page.url.lower()
def select_role(page):
print("Vybírám roli Site Manager...")
try:
page.wait_for_selector("select", timeout=10_000)
except PWTimeout:
return
for sel_el in page.query_selector_all("select"):
for opt in sel_el.query_selector_all("option"):
txt = (opt.inner_text() or "").strip()
if "site manager" in txt.lower():
sel_el.select_option(label=txt)
print(f" Vybráno: '{txt}'")
break
for btn_sel in ['input[value="Continue"]', 'input[type="submit"]',
'button:has-text("Continue")', 'button[type="submit"]']:
btn = page.query_selector(btn_sel)
if btn:
btn.click()
break
wait_load(page, 2000)
dbg(page, "after-role")
def navigate_to_reporter(page):
print("Klikám na Reporter...")
page.wait_for_selector('a:has-text("Reporter")', timeout=15_000)
page.click('a:has-text("Reporter")')
wait_load(page, 1500)
dbg(page, "reporter")
def open_report(page):
print(f"Otevírám report ID={REPORT_ID} (Data Listing - Data Stream)...")
selector = f'a[href="PromptsPage.aspx?ReportID={REPORT_ID}"]'
page.wait_for_selector(selector, timeout=15_000)
page.click(selector)
wait_load(page, 2000)
dbg(page, "report-opened")
# ---------------------------------------------------------------------------
# Parametry reportu
# ---------------------------------------------------------------------------
def set_study_param(page):
"""Rozbalí Study panel a vybere studii podle podřetězce STUDY_SEARCH."""
print(f" Parametr Study: hledám '{STUDY_SEARCH}'...")
page.click('#PromptsBox_st_ShowHideBtn')
page.wait_for_timeout(1500)
# Projdeme checkboxy a hledáme label obsahující STUDY_SEARCH
page.wait_for_selector('input[id^="PromptsBox_st_FrontEndCBList_"]', timeout=10_000)
checkboxes = page.query_selector_all('input[id^="PromptsBox_st_FrontEndCBList_"]')
found = False
for cb in checkboxes:
cb_id = cb.get_attribute("id")
# Label je ve stejné <td> nebo sousední — hledáme přes JS innerText rodiče
label_text = page.evaluate(
"""id => {
const el = document.getElementById(id);
if (!el) return '';
const row = el.closest('tr') || el.closest('td') || el.parentElement;
return row ? row.innerText : '';
}""",
cb_id
)
print(f" [{cb_id}] label: {label_text.strip()[:80]}")
if STUDY_SEARCH.upper() in label_text.upper():
if not page.locator(f"#{cb_id}").is_checked():
page.locator(f"#{cb_id}").check()
print(f" Nalezeno a zaškrtnuto: '{label_text.strip()}'")
found = True
break
if not found:
# Záloha: zkusíme index 0 a varujeme
print(f" VAROVÁNÍ: Studie '{STUDY_SEARCH}' nenalezena! Zkouším index 0...")
cb0 = page.locator('#PromptsBox_st_FrontEndCBList_0')
if not cb0.is_checked():
cb0.check()
wait_load(page, 3000)
dbg(page, "after-study")
def set_site_group_param(page, country: str):
"""Rozbalí Site Group, vybere zadanou zemi a zaškrtne Include Sub Site Groups."""
print(f" Parametr Site Group: {country}")
page.click('#PromptsBox_sg_ShowHideBtn')
page.wait_for_timeout(1500)
page.wait_for_selector('#PromptsBox_sg_List', timeout=10_000)
page.select_option('#PromptsBox_sg_List', label=country)
page.evaluate(
"document.querySelector('#PromptsBox_sg_List').dispatchEvent(new Event('change', {bubbles:true}))"
)
wait_load(page, 2000)
cb = page.locator('#PromptsBox_sg_CheckBox')
if not cb.is_checked():
cb.check()
page.evaluate(
"document.querySelector('#PromptsBox_sg_CheckBox').dispatchEvent(new Event('change', {bubbles:true}))"
)
wait_load(page, 2000)
# Zavřít panel = potvrzení → spustí postback pro Form
page.click('#PromptsBox_sg_ShowHideBtn')
wait_load(page, 3000)
dbg(page, "after-site-group")
def set_form_param(page, form_name: str):
"""Vybere formulář v Form panelu."""
print(f" Parametr Form: {form_name}")
is_closed = page.locator('#PromptsBox_fm2_div').evaluate('el => el.style.display') == 'none'
if is_closed:
page.click('#PromptsBox_fm2_ShowHideBtn')
page.wait_for_timeout(2000)
if page.locator('#PromptsBox_fm2_PageModeBtn').is_visible():
page.click('#PromptsBox_fm2_PageModeBtn')
page.wait_for_timeout(1000)
page.click('#PromptsBox_fm2_PageModeBtn')
page.wait_for_timeout(2000)
search = page.locator('#PromptsBox_fm2_SearchTxt')
search.wait_for(state='visible', timeout=10_000)
search.click()
search.fill(form_name)
page.wait_for_timeout(2000)
search.press('Enter')
page.wait_for_timeout(2000)
cb_locator = page.locator('input[id^="PromptsBox_fm2_FrontEndCBList_"]').first
try:
cb_locator.wait_for(state='visible', timeout=8_000)
except PWTimeout:
print(f" VAROVÁNÍ: '{form_name}' nenalezen!")
return
if not cb_locator.is_checked():
cb_locator.click()
print(f" '{form_name}' zaškrtnuto")
page.wait_for_timeout(2000)
# ---------------------------------------------------------------------------
# Submit a download
# ---------------------------------------------------------------------------
def submit_and_download(page, context, form_name: str, country: str | None):
print("Odesílám report...")
with context.expect_page() as new_page_info:
page.locator('input[value="Submit Report"], button:has-text("Submit Report")').first.click()
new_page = new_page_info.value
new_page.wait_for_url(lambda url: url != 'about:blank', timeout=30_000)
print(" Čekám na vygenerování reportu (max 5 min)...")
new_page.wait_for_selector(
'input[value="Download File"], button:has-text("Download File")',
timeout=300_000
)
new_page.wait_for_timeout(500)
dbg(new_page, "download-window")
# Najdeme správný frame
target_frame = new_page.main_frame
for frame in new_page.frames:
if frame.query_selector('select') or frame.query_selector('input[value="Download File"]'):
target_frame = frame
break
# File type: .csv (application/vnd.ms-excel)
for sel in target_frame.query_selector_all('select'):
for opt in sel.query_selector_all('option'):
val = opt.get_attribute('value') or ''
if 'vnd.ms-excel' in val:
sel.select_option(value=val)
print(" File type: .csv (application/vnd.ms-excel)")
break
# Export type: attachment
for sel in target_frame.query_selector_all('select'):
for opt in sel.query_selector_all('option'):
if 'attachment' in (opt.get_attribute('value') or '').lower():
sel.select_option(value='attachment')
break
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
country_slug = country if country else "ALL"
form_slug = form_name.replace(" ", "").replace("/", "-").replace("(", "").replace(")", "")
filename = f"{timestamp}_EDC_UCO3001_{country_slug}_{form_slug}_DataListing.csv"
output_path = DOWNLOAD_DIR / filename
print("Stahuji CSV...")
with new_page.expect_download(timeout=60_000) as dl_info:
btn = target_frame.query_selector('input[value="Download File"], button:has-text("Download File")')
if btn:
btn.click()
else:
new_page.locator('input[value="Download File"], button:has-text("Download File")').first.click()
dl_info.value.save_as(str(output_path))
print(f"\nHotovo! Soubor uložen: {output_path}")
try:
new_page.close()
except Exception:
pass
return output_path
# ---------------------------------------------------------------------------
# Hlavní funkce
# ---------------------------------------------------------------------------
def download_datalisting_reports_3001(form_name: str, country: str | None = None):
"""
Stáhne Data Listing report pro studii UCO3001.
Args:
form_name: Název formuláře, např. "Trial Disposition (Completion / Discontinuation)"
country: Kód site group, např. "CZE". Pokud None, filtr země se nenastaví (všechny).
"""
if not PASSWORD:
print("Chyba: nastav IMEDIDATA_PASSWORD v souboru .env")
sys.exit(1)
DOWNLOAD_DIR.mkdir(exist_ok=True)
with sync_playwright() as p:
browser = p.chromium.launch(headless=False, slow_mo=200)
ctx_kwargs = {"accept_downloads": True}
use_saved = auth_valid()
if use_saved:
print("Načítám uloženou session (auth.json)...")
ctx_kwargs["storage_state"] = str(AUTH_FILE)
context = browser.new_context(**ctx_kwargs)
page = context.new_page()
logged_in = go_to_select_role(page)
if not logged_in:
if use_saved:
print("Session expirovala, přihlašuji znovu...")
AUTH_FILE.unlink(missing_ok=True)
do_login(page, context)
go_to_select_role(page)
select_role(page)
navigate_to_reporter(page)
open_report(page)
print("\nNastavuji parametry reportu...")
set_study_param(page)
if country:
set_site_group_param(page, country)
else:
print(" Parametr Site Group: přeskočen (všechny země)")
print(f"\n=== Stahuji formulář: {form_name} ===")
set_form_param(page, form_name)
output = submit_and_download(page, context, form_name, country)
browser.close()
print("Prohlížeč zavřen.")
return output
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
if __name__ == "__main__":
# Příklady spuštění:
# python download_uco3001.py
# python download_uco3001.py CZE
country_arg = sys.argv[1] if len(sys.argv) > 1 else None
download_datalisting_reports_3001(
form_name="Trial Disposition (Completion / Discontinuation)",
country=country_arg,
)
+451
View File
@@ -0,0 +1,451 @@
"""
Import EDC CSV reportů do MongoDB.
Použití:
python edc_import.py report.csv
python edc_import.py reports/*.csv
python edc_import.py report.csv --host mongodb://192.168.1.100:27017 --db klinicka_studie
"""
import argparse
import csv
import glob
import logging
import os
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
from pymongo import MongoClient, ASCENDING
from pymongo.errors import PyMongoError
# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(message)s",
handlers=[
logging.FileHandler("edc_import.log", encoding="utf-8"),
logging.StreamHandler(open(sys.stdout.fileno(), mode="w", encoding="utf-8", closefd=False)),
],
)
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Mapování pevných CSV sloupců na MongoDB cesty
# ---------------------------------------------------------------------------
FIXED_FIELDS = {
"StudyName": "study",
"SiteGroupName": "site.group",
"SiteID": "site.id",
"SiteNumber": "site.number",
"Site": "site.name",
"SubjectID": "subject.id",
"Subject": "subject.label",
"CRFVersionID": "form.crfVersionId",
"InstanceID": "form.instanceId",
"InstanceName": "form.instanceName",
"FolderSeq": "form.folderSeq",
"Page": "form.page",
"RecordID": "form.recordId",
"RecordPosition": "form.recordPosition",
"LastModifiedDate": "lastModified",
"PrintDateTime": "importedAt",
}
# Sloupce, které jdou do _meta (ostatní administrativní)
META_FIELDS = {"RunUser", "VersionNumber", "FilterField"}
# Pole, která se převedou na int
INT_FIELDS = {"Elapsed days"}
# Formáty datumů, které zkusíme parsovat
DATE_FORMATS = [
"%d %b %Y %H:%M:%S", # 20 MAY 2026 12:06:18
"%d %b %Y %H:%M:%S:%f", # 10 Aug 2025 18:13:22:080 (EDC query dates)
"%Y%m%d %H:%M:%S.%f", # 20250810 18:13:22.080 (sortable query dates)
"%Y-%m-%d %H:%M:%S", # 2026-05-20 12:06:28
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%S.%fZ",
"%d/%m/%Y %H:%M:%S",
"%m/%d/%Y %H:%M:%S",
"%m/%d/%Y %I:%M:%S %p", # 5/20/2026 1:23:27 PM
]
# ---------------------------------------------------------------------------
# QueryDetails — detekce a mapování
# ---------------------------------------------------------------------------
QUERY_DETAIL_MARKER = "QueryID(ReQry)"
QUERY_META_FIELDS = {
"StudyParameter", "SiteGroupParameter", "SiteNumberParameter", "SiteParameter",
"SubjectParameter", "SubjectStatusParameter", "FolderParameter", "FormParameter",
"FieldParameter", "MarkingGroupParameter", "QueryStatusParameter",
"IncludeInactivePagesParameter", "PageSDVParameter", "PageFrozenParameter",
"PageLockedParameter", "StartDateParameter", "EndDateParameter",
"MilestoneParameter", "ReportTypeParameter", "VersionNumber", "TimeZone",
"RunUser", "ErrorString",
# Sortable dates — redundantní, parsujeme z hlavních sloupců
"OpenedDateSrtble", "AnsweredDateSrtble", "ClosedDateSrtble",
# Agregátní počty — jdou do meta
"VisitSiteLevel", "VisitCountryLevel", "VisitStudyLevel",
"PageSubjectLevel", "PageSiteLevel", "PageCountryLevel", "PageStudyLevel",
"Queries (Op/Ans/SDV)",
}
def is_query_details(fieldnames: list[str]) -> bool:
return QUERY_DETAIL_MARKER in fieldnames
def map_query_row(row: dict, source_file: str) -> dict:
"""Přemapuje řádek QueryDetails reportu na MongoDB dokument."""
def val(col: str) -> str:
return (row.get(col) or "").strip()
def int_or_none(col: str):
v = val(col)
if v == "":
return None
try:
return int(v)
except ValueError:
return v
def date_or_str(col: str):
v = val(col)
if not v:
return None
parsed = parse_date(v)
return parsed if parsed else v
meta = {k: row[k].strip() for k in QUERY_META_FIELDS if row.get(k, "").strip()}
doc = {
"study": val("StudyParameter"),
"site": {
"group": val("Country/Region"),
"number": val("Site Number"),
"name": val("Sites"),
},
"subject": {
"label": val("Subjects"),
"status": val("Subject Status"),
},
"visit": val("Visits"),
"page": val("Pages"),
"recordPosition": int_or_none("RecordPosition"),
"field": val("Field"),
"queryGroup": val("Query Group"),
"queryId": val(QUERY_DETAIL_MARKER),
"queryStatus": val("QueryStatus"),
"openedBy": val("Opened By"),
"openedDate": date_or_str("Opened Date"),
"answeredBy": val("Answered By") or None,
"answeredDate": date_or_str("Answered Date"),
"closedBy": val("Closed By") or None,
"closedDate": date_or_str("Closed Date"),
"daysNotYetClosed": int_or_none("DaysNotYetClosed"),
"daysToAnswer": int_or_none("Days to Answer"),
"daysToClose": int_or_none("Days to Close"),
"queryText": val("QueryText"),
"answerText": val("Answer Text (if any)") or None,
"importedAt": date_or_str("PrintDateTime"),
"sourceFile": source_file,
"_meta": meta,
}
# Odstraň None hodnoty z top-level (ne z nested)
return {k: v for k, v in doc.items() if v is not None or k in ("queryId",)}
def ensure_query_indexes(collection) -> None:
collection.create_index([("queryId", ASCENDING)], unique=True, sparse=True)
collection.create_index([("subject.label", ASCENDING)])
collection.create_index([("site.number", ASCENDING)])
collection.create_index([("queryStatus", ASCENDING)])
collection.create_index([("openedDate", ASCENDING)])
def ensure_snapshot_indexes(collection) -> None:
"""Indexy pro queries_snapshots — unikátní kombinace queryId + snapshotDate."""
collection.create_index(
[("queryId", ASCENDING), ("snapshotDate", ASCENDING)],
unique=True,
)
collection.create_index([("snapshotDate", ASCENDING)])
collection.create_index([("queryStatus", ASCENDING)])
collection.create_index([("site.number", ASCENDING)])
collection.create_index([("subject.label", ASCENDING)])
def extract_snapshot_date(filename: str) -> str:
"""
Vytáhne datum ze jména souboru.
'2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv''2026-05-20'
Fallback: dnešní datum.
"""
stem = Path(filename).name
match = re.match(r"(\d{4}-\d{2}-\d{2})", stem)
if match:
return match.group(1)
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
def parse_date(value: str) -> str | None:
"""Pokusí se převést string na ISO 8601; jinak vrátí None."""
value = value.strip()
for fmt in DATE_FORMATS:
try:
dt = datetime.strptime(value, fmt)
return dt.replace(tzinfo=timezone.utc).isoformat()
except ValueError:
continue
return None
def set_nested(doc: dict, path: str, value: str) -> None:
"""Nastaví hodnotu v nested dict podle tečkové cesty, např. 'site.id'."""
parts = path.split(".")
for part in parts[:-1]:
doc = doc.setdefault(part, {})
doc[parts[-1]] = value
def collection_name_from_filename(filename: str) -> str:
"""
Odvodí název kolekce z názvu souboru.
'2026-05-20_15-09_EDC_MDD3003_InterimInvestigatorSignature_DataListing.csv''MDD3003_InterimInvestigatorSignature'
'2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv''MDD3003_QueryDetails'
"""
stem = Path(filename).stem
# Se suffixem _DataListing
match = re.search(r"EDC_(.+?)_DataListing", stem, re.IGNORECASE)
if match:
return match.group(1)
# Bez suffixu _DataListing (např. QueryDetails)
match = re.search(r"EDC_(.+)$", stem, re.IGNORECASE)
if match:
return match.group(1)
return stem
def map_row(row: dict, source_file: str) -> dict:
"""Přemapuje jeden CSV řádek na MongoDB dokument."""
doc: dict = {}
meta: dict = {}
fields: dict = {}
# Zjisti všechny klíče pro FieldNValue/FieldNLabel
field_keys = set(row.keys())
for col, value in row.items():
value = value.strip() if value else ""
# Pevná pole
if col in FIXED_FIELDS:
path = FIXED_FIELDS[col]
if path == "form.folderSeq":
try:
value = int(value)
except (ValueError, TypeError):
pass
elif path == "form.recordPosition":
try:
value = int(value)
except (ValueError, TypeError):
pass
elif path in ("lastModified", "importedAt"):
parsed = parse_date(value)
value = parsed if parsed else value
set_nested(doc, path, value)
continue
# Meta pole
if col in META_FIELDS:
if value:
meta[col] = value
continue
# FieldNLabel / FieldNValue jsou zpracovány níže
if re.match(r"^Field\d+(Value|Label)$", col):
continue
# Zbývající neznámé pevné sloupce také do meta
if not re.match(r"^Field\d+", col):
if value:
meta[col] = value
# Zpracuj páry Field1Value/Field1Label ... Field300Value/Field300Label
n = 1
while True:
val_key = f"Field{n}Value"
lbl_key = f"Field{n}Label"
if val_key not in field_keys and lbl_key not in field_keys:
break
label = (row.get(lbl_key) or "").strip()
value = (row.get(val_key) or "").strip()
if label and value:
# Pokus o převod čísel
if label in INT_FIELDS:
try:
fields[label] = int(value)
except ValueError:
fields[label] = value
else:
# Pokus o datum
parsed = parse_date(value)
fields[label] = parsed if parsed else value
n += 1
doc["fields"] = fields
doc["sourceFile"] = source_file
if meta:
doc["_meta"] = meta
return doc
def ensure_indexes(collection) -> None:
collection.create_index([("form.recordId", ASCENDING)], unique=True, sparse=True)
collection.create_index([("subject.id", ASCENDING)])
collection.create_index([("site.id", ASCENDING)])
collection.create_index([("study", ASCENDING)])
collection.create_index([("lastModified", ASCENDING)])
def import_file(
csv_path: str,
collection,
snapshot_col=None,
snapshot_date: str | None = None,
) -> tuple[int, int, int]:
"""
Importuje jeden CSV soubor. Vrátí (inserted, updated, errors).
snapshot_col: pokud je zadán, pro QueryDetails se zapíše i daily snapshot.
"""
inserted = updated = errors = 0
source_file = Path(csv_path).name
with open(csv_path, encoding="utf-8", newline="") as f:
reader = csv.DictReader(f, delimiter=",", quotechar='"')
query_mode = is_query_details(reader.fieldnames or [])
for line_no, row in enumerate(reader, start=2):
try:
if query_mode:
doc = map_query_row(row, source_file)
upsert_key = {"queryId": doc["queryId"]}
# Snapshot — upsert na (queryId, snapshotDate)
if snapshot_col is not None and snapshot_date:
snap_doc = {**doc, "snapshotDate": snapshot_date}
snapshot_col.update_one(
{"queryId": doc["queryId"], "snapshotDate": snapshot_date},
{"$set": snap_doc},
upsert=True,
)
else:
doc = map_row(row, source_file)
record_id = doc.get("form", {}).get("recordId")
upsert_key = {"form.recordId": record_id} if record_id else None
if upsert_key:
result = collection.update_one(
upsert_key,
{"$set": doc},
upsert=True,
)
if result.upserted_id:
inserted += 1
else:
updated += 1
else:
collection.insert_one(doc)
inserted += 1
except PyMongoError as e:
errors += 1
log.error("Řádek %d v %s: MongoDB chyba: %s", line_no, csv_path, e)
except Exception as e:
errors += 1
log.error("Řádek %d v %s: %s", line_no, csv_path, e)
return inserted, updated, errors
def main() -> None:
parser = argparse.ArgumentParser(description="Import EDC CSV reportů do MongoDB")
parser.add_argument("files", nargs="+", help="CSV soubory nebo glob vzor")
parser.add_argument("--host", default="mongodb://192.168.1.76:27017", help="MongoDB URI")
parser.add_argument("--db", default="edc", help="Název databáze")
args = parser.parse_args()
# Rozbal glob vzory (důležité na Windows kde shell sám neglobuje)
paths: list[str] = []
for pattern in args.files:
expanded = glob.glob(pattern)
paths.extend(expanded if expanded else [pattern])
if not paths:
log.error("Žádné soubory nenalezeny.")
sys.exit(1)
client = MongoClient(args.host, serverSelectionTimeoutMS=5000)
try:
client.admin.command("ping")
except Exception as e:
log.error("Nelze se připojit k MongoDB (%s): %s", args.host, e)
sys.exit(1)
db = client[args.db]
total_inserted = total_updated = total_errors = 0
for csv_path in paths:
if not os.path.isfile(csv_path):
log.warning("Soubor neexistuje, přeskakuji: %s", csv_path)
continue
# Detekuj typ souboru a vyber kolekci + indexy
with open(csv_path, encoding="utf-8", newline="") as f:
fieldnames = csv.DictReader(f).fieldnames or []
if is_query_details(fieldnames):
col_name = "queries"
collection = db[col_name]
ensure_query_indexes(collection)
snapshot_col = db["queries_snapshots"]
ensure_snapshot_indexes(snapshot_col)
snapshot_date = extract_snapshot_date(csv_path)
log.info("Importuji: %s%s.%s + queries_snapshots [%s]",
csv_path, args.db, col_name, snapshot_date)
else:
col_name = collection_name_from_filename(csv_path)
collection = db[col_name]
ensure_indexes(collection)
snapshot_col = None
snapshot_date = None
log.info("Importuji: %s%s.%s", csv_path, args.db, col_name)
inserted, updated, errors = import_file(
csv_path, collection, snapshot_col, snapshot_date
)
total_inserted += inserted
total_updated += updated
total_errors += errors
log.info(" nové: %d aktualizované: %d chyby: %d", inserted, updated, errors)
log.info("=" * 60)
log.info("Celkem — nové: %d aktualizované: %d chyby: %d",
total_inserted, total_updated, total_errors)
client.close()
if __name__ == "__main__":
main()
+43
View File
@@ -0,0 +1,43 @@
# create_report_v1.0.md
**Skript:** `create_report_v1.0.py`
**Verze:** 1.0
**Datum:** 2026-06-01
## Popis
Generuje Excel EDC DataListing report pro studii **77242113UCO3001** z MongoDB (db: `edc`).
## Výstup
`Medidata/reports/YYYY-MM-DD 77242113UCO3001 EDC DataListing v1.0.xlsx`
Stará verze se automaticky přesune do `reports/TRASH/`.
## Listy
| List | Kolekce MongoDB | Záznamy (CZE) |
|------|----------------|---------------|
| DateofVisit | UCO3001.DateofVisit | 55 |
| ConcomitantTherapy | UCO3001.ConcomitantTherapy | 91 |
| TrialDisposition | UCO3001.TrialDispositionCompletion-Discontinuation | 3 |
## Sloupce (každý list)
**Pevné:** SiteNumber · SiteName · Subject · Visit · FolderSeq · RecordPos · LastModified
**Dynamické:** všechny klíče z `fields{}` v pořadí výskytu v MongoDB
## Formátování
- Záhlaví: tmavomodrý fill, bílý tučný text, Calibri 10
- Data: Calibri 10, tenké ohraničení
- Zmrazení řádku 1, autofilter, šířky sloupců auto (max 55)
- Datumy: DD-MMM-YYYY (čas jen pokud != 00:00)
## Spuštění
```
cd Medidata
python create_report_v1.0.py
```
+210
View File
@@ -0,0 +1,210 @@
"""
create_report_v1.0.py
Verze: 1.0
Datum: 2026-06-01
Popis: Excel EDC DataListing report pro studii UCO3001 z MongoDB (db: edc).
Jeden list per kolekce (DateofVisit / ConcomitantTherapy / TrialDisposition).
Sloupce: SiteNumber, SiteName, Subject, Visit, FolderSeq, RecordPos,
LastModified + dynamické fields.* z MongoDB.
Výstup: reports/YYYY-MM-DD 77242113UCO3001 EDC DataListing v1.0.xlsx
"""
import shutil
from datetime import datetime
from pathlib import Path
from openpyxl import Workbook
from openpyxl.styles import Alignment, Border, Font, PatternFill, Side
from openpyxl.utils import get_column_letter
from pymongo import ASCENDING, MongoClient
# ── Konfigurace ───────────────────────────────────────────────────────────────
MONGO_URI = "mongodb://192.168.1.76:27017"
DB_NAME = "edc"
STUDY_FULL = "77242113UCO3001"
VERSION = "1.0"
OUTPUT_DIR = Path(__file__).parent / "reports"
TRASH_DIR = OUTPUT_DIR / "TRASH"
COLLECTIONS = [
"UCO3001.DateofVisit",
"UCO3001.ConcomitantTherapy",
"UCO3001.TrialDispositionCompletion-Discontinuation",
]
# ── Formátování ───────────────────────────────────────────────────────────────
HEADER_FILL = PatternFill("solid", fgColor="1F4E79")
HEADER_FONT = Font(bold=True, color="FFFFFF", name="Calibri", size=10)
DATA_FONT = Font(name="Calibri", size=10)
THIN = Side(style="thin", color="CCCCCC")
BORDER = Border(left=THIN, right=THIN, top=THIN, bottom=THIN)
# ── Pevné sloupce ─────────────────────────────────────────────────────────────
FIXED_COLS = [
("SiteNumber", lambda d: d.get("site", {}).get("number", "")),
("SiteName", lambda d: d.get("site", {}).get("name", "")),
("Subject", lambda d: d.get("subject", {}).get("label", "")),
("Visit", lambda d: d.get("form", {}).get("instanceName", "")),
("FolderSeq", lambda d: d.get("form", {}).get("folderSeq", "")),
("RecordPos", lambda d: d.get("form", {}).get("recordPosition", "")),
("LastModified", lambda d: _fmt(d.get("lastModified", ""))),
]
# ── Helpers ───────────────────────────────────────────────────────────────────
def _fmt(value: str) -> str:
"""ISO datetime string → 'DD-MMM-YYYY' nebo 'DD-MMM-YYYY HH:MM'."""
if not value:
return ""
try:
dt = datetime.fromisoformat(value)
if dt.hour == 0 and dt.minute == 0 and dt.second == 0:
return dt.strftime("%d-%b-%Y")
return dt.strftime("%d-%b-%Y %H:%M")
except Exception:
return value
def _fmt_field(value) -> str:
"""Naformátuje hodnotu z fields{} — datum nebo string."""
if isinstance(value, str) and "T" in value and value.endswith(("+00:00", "Z")):
return _fmt(value)
return value if value is not None else ""
COLS_LAST_CT = [
"CMTRT_ATC1", "CMTRT_ATC2", "CMTRT_ATC3", "CMTRT_ATC4",
"CMTRT_RXPREF", "CMTRT_TRADE_NAME",
"CMTRT_ATC1_CODE", "CMTRT_ATC2_CODE", "CMTRT_ATC3_CODE", "CMTRT_ATC4_CODE",
"CMTRT_RXPREF_CODE", "CMTRT_TRADE_NAME_CODE",
]
def _field_keys(docs: list, last: list | None = None) -> list:
"""Vrátí seznam unikátních klíčů z fields{} — klíče v `last` přesunuty na konec."""
seen = set()
keys = []
for doc in docs:
for k in doc.get("fields", {}).keys():
if k not in seen:
seen.add(k)
keys.append(k)
if last:
tail = [k for k in last if k in seen]
keys = [k for k in keys if k not in set(tail)] + tail
return keys
def _sheet_name(collection: str) -> str:
"""UCO3001.SomeName → SomeName (max 31 znaků pro Excel)."""
name = collection.split(".", 1)[-1]
abbreviations = {
"TrialDispositionCompletion-Discontinuation": "TrialDisposition",
}
return abbreviations.get(name, name)[:31]
# ── Zápis listu ───────────────────────────────────────────────────────────────
def write_sheet(ws, docs: list, last_cols: list | None = None) -> None:
fixed_names = [c[0] for c in FIXED_COLS]
field_keys = _field_keys(docs, last=last_cols)
all_headers = fixed_names + field_keys
# záhlaví
for col_i, header in enumerate(all_headers, 1):
cell = ws.cell(row=1, column=col_i, value=header)
cell.font = HEADER_FONT
cell.fill = HEADER_FILL
cell.border = BORDER
cell.alignment = Alignment(horizontal="center", vertical="center")
ws.row_dimensions[1].height = 18
ws.freeze_panes = "A2"
# data
for row_i, doc in enumerate(docs, 2):
fields = doc.get("fields", {})
for col_i, (_, getter) in enumerate(FIXED_COLS, 1):
cell = ws.cell(row=row_i, column=col_i, value=getter(doc))
cell.font = DATA_FONT
cell.border = BORDER
cell.alignment = Alignment(vertical="top")
for col_off, key in enumerate(field_keys):
col_i = len(FIXED_COLS) + col_off + 1
cell = ws.cell(row=row_i, column=col_i, value=_fmt_field(fields.get(key, "")))
cell.font = DATA_FONT
cell.border = BORDER
cell.alignment = Alignment(vertical="top")
# autofilter
if all_headers:
ws.auto_filter.ref = f"A1:{get_column_letter(len(all_headers))}1"
# šířky sloupců
widths = {i: len(h) for i, h in enumerate(all_headers, 1)}
for doc in docs:
fields = doc.get("fields", {})
for col_i, (_, getter) in enumerate(FIXED_COLS, 1):
widths[col_i] = max(widths[col_i], len(str(getter(doc))))
for col_off, key in enumerate(field_keys):
col_i = len(FIXED_COLS) + col_off + 1
widths[col_i] = max(widths[col_i], len(str(fields.get(key, ""))))
for col_i, w in widths.items():
ws.column_dimensions[get_column_letter(col_i)].width = min(w + 2, 55)
# ── Main ──────────────────────────────────────────────────────────────────────
def main() -> None:
client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
client.admin.command("ping")
db = client[DB_NAME]
wb = Workbook()
wb.remove(wb.active)
for coll_name in COLLECTIONS:
docs = list(db[coll_name].find(
{},
{"_id": 0, "sourceFile": 0, "history": 0},
sort=[
("site.number", ASCENDING),
("subject.label", ASCENDING),
("form.folderSeq", ASCENDING),
("form.recordPosition", ASCENDING),
],
))
ws = wb.create_sheet(title=_sheet_name(coll_name))
last = COLS_LAST_CT if "ConcomitantTherapy" in coll_name else None
write_sheet(ws, docs, last_cols=last)
print(f" {coll_name}: {len(docs)} zaznamu -> list '{ws.title}'")
client.close()
OUTPUT_DIR.mkdir(exist_ok=True)
TRASH_DIR.mkdir(exist_ok=True)
# přesun starých verzí do TRASH
pattern = f"* {STUDY_FULL} EDC DataListing *.xlsx"
for old in OUTPUT_DIR.glob(pattern):
dest = TRASH_DIR / old.name
shutil.move(str(old), str(dest))
print(f" Přesunuto do TRASH: {old.name}")
today = datetime.now().strftime("%Y-%m-%d")
filename = f"{today} {STUDY_FULL} EDC DataListing v{VERSION}.xlsx"
out_path = OUTPUT_DIR / filename
wb.save(str(out_path))
print(f"\nUloženo: {out_path}")
if __name__ == "__main__":
main()
Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 249 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

+501
View File
@@ -0,0 +1,501 @@
"""
download_edc_datalistings.py
Verze: 2.0
Datum: 2026-05-27
Univerzální stahování EDC Data Listing reportů (ReportID=92) z Medidata Rave.
Parametry:
study vyhledávací řetězec studie (např. "77242113UCO3001")
forms seznam názvů formulářů ke stažení
country kód země / site group (např. "CZE"), None = všechny
Prohlížeč se otevře jednou, přihlásí se, a stáhne všechny formuláře v jedné session.
Použití:
from download_edc import download_datalisting
download_datalisting(
study="77242113UCO3001",
forms=["Date of Visit", "Concomitant Therapy"],
country="CZE",
)
"""
import os
import re
import sys
from datetime import datetime, timedelta
from pathlib import Path
from dotenv import load_dotenv
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
import tkinter as tk
from tkinter import simpledialog
load_dotenv(Path(__file__).parent / ".env")
USERNAME = os.getenv("IMEDIDATA_USERNAME", "vladimir.buzalka")
PASSWORD = os.getenv("IMEDIDATA_PASSWORD", "")
DOWNLOAD_DIR = Path(__file__).parent / "downloads"
AUTH_FILE = Path(__file__).parent / "auth.json"
AUTH_MAX_AGE_DAYS = 7
LOGIN_URL = "https://login.imedidata.com/login"
SELECT_ROLE_URL = (
"https://jnjja.mdsol.com/MedidataRave/SelectRole.aspx"
"?client_division_uuid=e5de55d5-a414-4bd1-9abe-18e96fd5475d"
"&study_group_uuid=b0793ca6-33ec-44e8-883b-6fc1a4b671c4"
"&studygroup_id=107981"
)
REPORT_ID = 92
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def auth_valid():
if not AUTH_FILE.exists():
return False
age = datetime.now() - datetime.fromtimestamp(AUTH_FILE.stat().st_mtime)
return age < timedelta(days=AUTH_MAX_AGE_DAYS)
def wait_load(page, extra_ms=1000):
try:
page.wait_for_load_state("load", timeout=20_000)
except PWTimeout:
pass
page.wait_for_timeout(extra_ms)
def dbg(page, label):
print(f"[{label}] URL: {page.url}")
try:
from pathlib import Path
shots = Path(__file__).parent / "debug_shots"
shots.mkdir(exist_ok=True)
path = shots / f"{label}.png"
page.screenshot(path=str(path), full_page=True)
print(f"[{label}] screenshot: {path}")
except Exception as e:
print(f"[{label}] screenshot failed: {e}")
def extract_study_label(study_search: str) -> str:
match = re.search(r'[A-Z]+\d+$', study_search)
return match.group(0) if match else study_search
# ---------------------------------------------------------------------------
# Login
# ---------------------------------------------------------------------------
def _ask_otp_popup():
root = tk.Tk()
root.withdraw()
root.lift()
root.attributes("-topmost", True)
otp = simpledialog.askstring("OKTA MFA", "Zadej OTP kód z OKTA (6 číslic):", parent=root)
root.destroy()
return (otp or "").strip()
def do_login(page, context):
print("Přihlašuji se do iMedidata...")
page.goto(LOGIN_URL)
wait_load(page, 500)
page.wait_for_selector('input[name="session[username]"]', timeout=10_000)
page.fill('input[name="session[username]"]', USERNAME)
page.fill('input[name="session[password]"]', PASSWORD)
page.click('button[type="submit"]')
wait_load(page, 2000)
dbg(page, "after-signin")
if _okta_mfa_present(page):
print("\n*** OKTA MFA vyžadována! ***")
otp = _ask_otp_popup()
if not otp:
print("CHYBA: OTP nebylo zadáno.")
sys.exit(1)
_fill_otp(page, otp)
wait_load(page, 3000)
try:
page.wait_for_url("**/home.imedidata.com**", timeout=30_000)
except PWTimeout:
dbg(page, "wait-home-timeout")
if "home.imedidata.com" not in page.url:
print("CHYBA: Přihlášení se nezdařilo!")
sys.exit(1)
context.storage_state(path=str(AUTH_FILE))
print("Session uložena do auth.json")
def _okta_mfa_present(page):
if "okta" in page.url.lower():
return True
for sel in ['input[name="answer"]', 'input[name*="otp"]',
'input[name*="code"]', 'input[placeholder*="code" i]']:
if page.query_selector(sel):
return True
return False
def _fill_otp(page, otp):
for sel in ['input[name="answer"]', 'input[name*="otp"]',
'input[name*="code"]', 'input[type="tel"]', 'input[placeholder*="code" i]']:
el = page.query_selector(sel)
if el:
el.fill(otp)
page.keyboard.press("Enter")
return
page.keyboard.type(otp)
page.keyboard.press("Enter")
# ---------------------------------------------------------------------------
# Navigace
# ---------------------------------------------------------------------------
def go_to_select_role(page):
print("Navigace na SelectRole...")
try:
page.goto(SELECT_ROLE_URL)
except Exception:
pass
wait_load(page, 1500)
dbg(page, "select-role")
return "login" not in page.url.lower() and "okta" not in page.url.lower()
def select_role(page):
print("Vybírám roli Site Manager...")
try:
page.wait_for_selector("select", timeout=10_000)
except PWTimeout:
return
for sel_el in page.query_selector_all("select"):
for opt in sel_el.query_selector_all("option"):
txt = (opt.inner_text() or "").strip()
if "site manager" in txt.lower():
sel_el.select_option(label=txt)
print(f" Vybráno: '{txt}'")
break
clicked = False
for btn_sel in ['input[value="Continue"]', 'input[type="submit"]',
'button:has-text("Continue")', 'button[type="submit"]']:
try:
btn = page.query_selector(btn_sel)
except Exception:
continue
if btn:
try:
with page.expect_navigation(timeout=15_000):
btn.click()
clicked = True
break
except PWTimeout:
print(f" Click on {btn_sel} nezpůsobil navigaci, zkouším další...")
continue
if not clicked:
print(" Fallback: submituji formulář přes JS...")
try:
with page.expect_navigation(timeout=15_000):
page.evaluate("document.forms[0] && document.forms[0].submit()")
except PWTimeout:
print(" JS submit fallback také neprošel.")
wait_load(page, 1500)
dbg(page, "after-role")
def navigate_to_reporter(page):
print("Klikám na Reporter...")
page.wait_for_selector('a:has-text("Reporter")', timeout=15_000)
page.click('a:has-text("Reporter")')
wait_load(page, 1500)
dbg(page, "reporter")
def open_report(page):
print(f"Otevírám report ID={REPORT_ID} (Data Listing - Data Stream)...")
selector = f'a[href="PromptsPage.aspx?ReportID={REPORT_ID}"]'
page.wait_for_selector(selector, timeout=15_000)
page.click(selector)
wait_load(page, 2000)
dbg(page, "report-opened")
# ---------------------------------------------------------------------------
# Parametry reportu
# ---------------------------------------------------------------------------
def set_study_param(page, study_search: str):
print(f" Parametr Study: hledám '{study_search}'...")
page.click('#PromptsBox_st_ShowHideBtn')
page.wait_for_timeout(1500)
page.wait_for_selector('input[id^="PromptsBox_st_FrontEndCBList_"]', timeout=10_000)
checkboxes = page.query_selector_all('input[id^="PromptsBox_st_FrontEndCBList_"]')
found = False
for cb in checkboxes:
cb_id = cb.get_attribute("id")
label_text = page.evaluate(
"""id => {
const el = document.getElementById(id);
if (!el) return '';
const row = el.closest('tr') || el.closest('td') || el.parentElement;
return row ? row.innerText : '';
}""",
cb_id
)
print(f" [{cb_id}] label: {label_text.strip()[:80]}")
if study_search.upper() in label_text.upper():
if not page.locator(f"#{cb_id}").is_checked():
page.locator(f"#{cb_id}").check()
print(f" Nalezeno a zaškrtnuto: '{label_text.strip()}'")
found = True
break
if not found:
print(f" VAROVÁNÍ: Studie '{study_search}' nenalezena! Zkouším index 0...")
cb0 = page.locator('#PromptsBox_st_FrontEndCBList_0')
if not cb0.is_checked():
cb0.check()
wait_load(page, 3000)
dbg(page, "after-study")
def set_site_group_param(page, country: str):
print(f" Parametr Site Group: {country}")
page.click('#PromptsBox_sg_ShowHideBtn')
page.wait_for_timeout(1500)
page.wait_for_selector('#PromptsBox_sg_List', timeout=10_000)
page.select_option('#PromptsBox_sg_List', label=country)
page.evaluate(
"document.querySelector('#PromptsBox_sg_List').dispatchEvent(new Event('change', {bubbles:true}))"
)
wait_load(page, 2000)
cb = page.locator('#PromptsBox_sg_CheckBox')
if not cb.is_checked():
cb.check()
page.evaluate(
"document.querySelector('#PromptsBox_sg_CheckBox').dispatchEvent(new Event('change', {bubbles:true}))"
)
wait_load(page, 2000)
page.click('#PromptsBox_sg_ShowHideBtn')
wait_load(page, 3000)
dbg(page, "after-site-group")
def set_form_param(page, form_name: str):
print(f" Parametr Form: {form_name}")
is_closed = page.locator('#PromptsBox_fm2_div').evaluate('el => el.style.display') == 'none'
if is_closed:
page.click('#PromptsBox_fm2_ShowHideBtn')
page.wait_for_timeout(2000)
if page.locator('#PromptsBox_fm2_PageModeBtn').is_visible():
page.click('#PromptsBox_fm2_PageModeBtn')
page.wait_for_timeout(1000)
page.click('#PromptsBox_fm2_PageModeBtn')
page.wait_for_timeout(2000)
search = page.locator('#PromptsBox_fm2_SearchTxt')
search.wait_for(state='visible', timeout=10_000)
search.click()
search.fill(form_name)
page.wait_for_timeout(2000)
search.press('Enter')
page.wait_for_timeout(2000)
cb_locator = page.locator('input[id^="PromptsBox_fm2_FrontEndCBList_"]').first
try:
cb_locator.wait_for(state='visible', timeout=8_000)
except PWTimeout:
print(f" VAROVÁNÍ: '{form_name}' nenalezen!")
return
if not cb_locator.is_checked():
cb_locator.click()
print(f" '{form_name}' zaškrtnuto")
page.wait_for_timeout(2000)
# ---------------------------------------------------------------------------
# Submit a download
# ---------------------------------------------------------------------------
def submit_and_download(page, context, form_name: str, country: str | None, study_label: str):
print("Odesílám report...")
with context.expect_page() as new_page_info:
page.locator('input[value="Submit Report"], button:has-text("Submit Report")').first.click()
new_page = new_page_info.value
new_page.wait_for_url(lambda url: url != 'about:blank', timeout=30_000)
print(" Čekám na vygenerování reportu (max 5 min)...")
new_page.wait_for_selector(
'input[value="Download File"], button:has-text("Download File")',
timeout=300_000
)
new_page.wait_for_timeout(500)
dbg(new_page, "download-window")
target_frame = new_page.main_frame
for frame in new_page.frames:
if frame.query_selector('select') or frame.query_selector('input[value="Download File"]'):
target_frame = frame
break
for sel in target_frame.query_selector_all('select'):
for opt in sel.query_selector_all('option'):
val = opt.get_attribute('value') or ''
if 'vnd.ms-excel' in val:
sel.select_option(value=val)
print(" File type: .csv (application/vnd.ms-excel)")
break
for sel in target_frame.query_selector_all('select'):
for opt in sel.query_selector_all('option'):
if 'attachment' in (opt.get_attribute('value') or '').lower():
sel.select_option(value='attachment')
break
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
country_slug = country if country else "ALL"
form_slug = form_name.replace(" ", "").replace("/", "-").replace("(", "").replace(")", "")
filename = f"{timestamp}_EDC_{study_label}_{country_slug}_{form_slug}_DataListing.csv"
output_path = DOWNLOAD_DIR / filename
print("Stahuji CSV...")
with new_page.expect_download(timeout=60_000) as dl_info:
btn = target_frame.query_selector('input[value="Download File"], button:has-text("Download File")')
if btn:
btn.click()
else:
new_page.locator('input[value="Download File"], button:has-text("Download File")').first.click()
dl_info.value.save_as(str(output_path))
print(f" Uloženo: {output_path}")
try:
new_page.close()
except Exception:
pass
return output_path
# ---------------------------------------------------------------------------
# Hlavní funkce
# ---------------------------------------------------------------------------
def download_datalisting(study: str, forms: list[str], country: str | None = None):
"""
Stáhne EDC Data Listing reporty pro zadanou studii.
Args:
study: Vyhledávací řetězec studie, např. "77242113UCO3001"
forms: Seznam názvů formulářů ke stažení
country: Kód site group, např. "CZE". None = všechny země.
"""
if not PASSWORD:
print("Chyba: nastav IMEDIDATA_PASSWORD v souboru .env")
sys.exit(1)
if not forms:
print("Žádné formuláře ke stažení.")
return []
DOWNLOAD_DIR.mkdir(exist_ok=True)
study_label = extract_study_label(study)
results = []
with sync_playwright() as p:
browser = p.chromium.launch(
headless=False,
slow_mo=200,
args=["--start-maximized"],
)
ctx_kwargs = {"accept_downloads": True, "no_viewport": True}
use_saved = auth_valid()
if use_saved:
print("Načítám uloženou session (auth.json)...")
ctx_kwargs["storage_state"] = str(AUTH_FILE)
context = browser.new_context(**ctx_kwargs)
page = context.new_page()
logged_in = go_to_select_role(page)
if not logged_in:
if use_saved:
print("Session expirovala, přihlašuji znovu...")
AUTH_FILE.unlink(missing_ok=True)
do_login(page, context)
go_to_select_role(page)
select_role(page)
navigate_to_reporter(page)
open_report(page)
prompts_url = page.url
print("\nNastavuji parametry reportu...")
set_study_param(page, study)
if country:
set_site_group_param(page, country)
else:
print(" Parametr Site Group: přeskočen (všechny země)")
for i, form_name in enumerate(forms):
print(f"\n=== [{i+1}/{len(forms)}] Stahuji formulář: {form_name} ===")
if i > 0:
print("Navigace zpět na report...")
page.goto(prompts_url)
wait_load(page, 2000)
set_study_param(page, study)
if country:
set_site_group_param(page, country)
set_form_param(page, form_name)
output = submit_and_download(page, context, form_name, country, study_label)
results.append(output)
browser.close()
print(f"\nHotovo! Staženo {len(results)} formulářů. Prohlížeč zavřen.")
return results
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
if __name__ == "__main__":
country_arg = sys.argv[1] if len(sys.argv) > 1 else None
download_datalisting(
study="77242113UCO3001",
forms=["Trial Disposition (Completion / Discontinuation)"],
country=country_arg,
)
+483
View File
@@ -0,0 +1,483 @@
import os
import sys
from datetime import datetime, timedelta
from pathlib import Path
from dotenv import load_dotenv
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
import tkinter as tk
from tkinter import simpledialog
load_dotenv(Path(__file__).parent / ".env")
USERNAME = os.getenv("IMEDIDATA_USERNAME", "vladimir.buzalka")
PASSWORD = os.getenv("IMEDIDATA_PASSWORD", "")
DOWNLOAD_DIR = Path(__file__).parent / "downloads"
AUTH_FILE = Path(__file__).parent / "auth.json"
AUTH_MAX_AGE_DAYS = 7
LOGIN_URL = "https://login.imedidata.com/login"
SELECT_ROLE_URL = (
"https://jnjja.mdsol.com/MedidataRave/SelectRole.aspx"
"?client_division_uuid=e5de55d5-a414-4bd1-9abe-18e96fd5475d"
"&study_group_uuid=b0793ca6-33ec-44e8-883b-6fc1a4b671c4"
"&studygroup_id=107981"
)
STUDY_NAME = "42847922MDD3003"
SITE_GROUP = "CZE"
REPORT_ID = 164 # _EDC Std Rpt - Query Details (Data Stream)
# Query Status: libovolná kombinace z ["Open", "Answered", "Closed", "Canceled"]
QUERY_STATUSES = [] # prázdné = Default: All (nefiltrovat)
# Milestone: vždy dostupný "Final", ostatní závisí na studii
MILESTONES = ["Final"]
# Datum ve formátu DD-Mon-YYYY (např. "01-Jan-2024"), prázdný řetězec = bez filtru
START_DATE = ""
END_DATE = ""
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def auth_valid():
if not AUTH_FILE.exists():
return False
age = datetime.now() - datetime.fromtimestamp(AUTH_FILE.stat().st_mtime)
return age < timedelta(days=AUTH_MAX_AGE_DAYS)
def wait_load(page, extra_ms=1000):
try:
page.wait_for_load_state("load", timeout=20_000)
except PWTimeout:
pass
page.wait_for_timeout(extra_ms)
def dbg(page, label):
print(f"[{label}] URL: {page.url}")
# ---------------------------------------------------------------------------
# Login
# ---------------------------------------------------------------------------
def _ask_otp_popup():
root = tk.Tk()
root.withdraw()
root.lift()
root.attributes("-topmost", True)
otp = simpledialog.askstring(
"OKTA MFA",
"Zadej OTP kód z OKTA (6 číslic):",
parent=root,
)
root.destroy()
return (otp or "").strip()
def do_login(page, context):
print("Přihlašuji se do iMedidata...")
page.goto(LOGIN_URL)
wait_load(page, 500)
dbg(page, "login-page")
page.wait_for_selector('input[name="session[username]"]', timeout=10_000)
page.fill('input[name="session[username]"]', USERNAME)
page.fill('input[name="session[password]"]', PASSWORD)
page.click('button[type="submit"]')
wait_load(page, 2000)
dbg(page, "after-signin")
if _okta_mfa_present(page):
print("\n*** OKTA MFA vyžadována! ***")
otp = _ask_otp_popup()
if not otp:
print("CHYBA: OTP nebylo zadáno.")
sys.exit(1)
_fill_otp(page, otp)
wait_load(page, 3000)
dbg(page, "after-otp")
try:
page.wait_for_url("**/home.imedidata.com**", timeout=30_000)
except PWTimeout:
dbg(page, "wait-home-timeout")
dbg(page, "final-login")
if "home.imedidata.com" not in page.url:
print("CHYBA: Přihlášení se nezdařilo!")
input("Zmáčkni Enter pro ukončení...")
sys.exit(1)
context.storage_state(path=str(AUTH_FILE))
print("Session uložena do auth.json")
def _okta_mfa_present(page):
if "okta" in page.url.lower():
return True
for sel in [
'input[name="answer"]',
'input[name*="otp"]',
'input[name*="code"]',
'input[placeholder*="code" i]',
]:
try:
if page.query_selector(sel):
return True
except Exception:
# Page navigated during selector, skip
pass
return False
def _fill_otp(page, otp):
for sel in [
'input[name="answer"]',
'input[name*="otp"]',
'input[name*="code"]',
'input[type="tel"]',
'input[placeholder*="code" i]',
]:
try:
el = page.query_selector(sel)
if el:
el.fill(otp)
page.keyboard.press("Enter")
return
except Exception:
# Page navigated, continue to next selector
pass
try:
page.keyboard.type(otp)
page.keyboard.press("Enter")
except Exception:
pass
# ---------------------------------------------------------------------------
# Navigace
# ---------------------------------------------------------------------------
def go_to_select_role(page):
print("Navigace na SelectRole...")
try:
page.goto(SELECT_ROLE_URL)
except Exception:
pass
wait_load(page, 1500)
dbg(page, "select-role")
return "login" not in page.url.lower() and "okta" not in page.url.lower()
def select_role(page):
print("Vybírám roli Site Manager...")
try:
page.wait_for_selector("select", timeout=10_000)
except PWTimeout:
return
selects = page.query_selector_all("select")
found = False
for sel_el in selects:
opts = sel_el.query_selector_all("option")
for opt in opts:
txt = (opt.inner_text() or "").strip()
if "site manager" in txt.lower():
sel_el.select_option(label=txt)
found = True
print(f" Vybráno: '{txt}'")
break
if found:
break
if not found:
try:
page.get_by_text("Site Manager", exact=False).first.click()
except Exception as e:
print(f" {e}")
for btn_sel in [
'input[value="Continue"]',
'input[type="submit"]',
'button:has-text("Continue")',
'button[type="submit"]',
]:
try:
btn = page.query_selector(btn_sel)
if btn:
btn.click()
break
except Exception:
continue
wait_load(page, 2000)
dbg(page, "after-role")
def navigate_to_reporter(page):
print("Klikám na Reporter...")
try:
page.wait_for_selector('a:has-text("Reporter")', timeout=15_000)
page.click('a:has-text("Reporter")')
wait_load(page, 1500)
dbg(page, "reporter")
except PWTimeout:
dbg(page, "reporter-not-found")
raise
def open_report(page):
print(f"Klikám na report ID={REPORT_ID} (Query Details)...")
selector = f'a[href="PromptsPage.aspx?ReportID={REPORT_ID}"]'
try:
page.wait_for_selector(selector, timeout=15_000)
page.click(selector)
wait_load(page, 2000)
dbg(page, "report-opened")
except PWTimeout:
dbg(page, "report-not-found")
raise
# ---------------------------------------------------------------------------
# Parametry reportu
# ---------------------------------------------------------------------------
def set_study_param(page):
print(f" Parametr Study: {STUDY_NAME}")
page.click('#PromptsBox_st_ShowHideBtn')
page.wait_for_timeout(1500)
page.wait_for_selector('#PromptsBox_st_FrontEndCBList_0', timeout=10_000)
cb = page.locator('#PromptsBox_st_FrontEndCBList_0')
if not cb.is_checked():
cb.check()
wait_load(page, 3000)
dbg(page, "after-study")
def set_site_group_param(page):
print(f" Parametr Site Group: {SITE_GROUP}")
page.click('#PromptsBox_sg_ShowHideBtn')
page.wait_for_timeout(1500)
page.wait_for_selector('#PromptsBox_sg_List', timeout=10_000)
page.select_option('#PromptsBox_sg_List', label=SITE_GROUP)
page.evaluate("document.querySelector('#PromptsBox_sg_List').dispatchEvent(new Event('change', {bubbles:true}))")
wait_load(page, 2000)
print(" Include Sub Site Groups: zapnuto")
cb = page.locator('#PromptsBox_sg_CheckBox')
if not cb.is_checked():
cb.check()
page.evaluate("document.querySelector('#PromptsBox_sg_CheckBox').dispatchEvent(new Event('change', {bubbles:true}))")
wait_load(page, 2000)
page.click('#PromptsBox_sg_ShowHideBtn')
wait_load(page, 3000)
dbg(page, "after-site-group")
def set_query_status_param(page):
if not QUERY_STATUSES:
print(" Parametr Query Status: All (přeskočeno)")
return
print(f" Parametr Query Status: {', '.join(QUERY_STATUSES)}")
page.click('#PromptsBox_qu_ShowHideBtn')
page.wait_for_timeout(1500)
# Počkáme na načtení checkboxů
page.wait_for_selector('input[id^="PromptsBox_qu_FrontEndCBList_"]', timeout=10_000)
# Zaškrtneme požadované statusy podle labelu
label_map = {"Open": 0, "Answered": 1, "Closed": 2, "Canceled": 3}
for status in QUERY_STATUSES:
idx = label_map.get(status)
if idx is None:
print(f" VAROVÁNÍ: neznámý status '{status}'")
continue
cb = page.locator(f'#PromptsBox_qu_FrontEndCBList_{idx}')
if not cb.is_checked():
cb.check()
print(f" '{status}' zaškrtnuto")
wait_load(page, 1000)
def set_milestone_param(page):
print(f" Parametr Milestone: {', '.join(MILESTONES)}")
# Otevřít panel pokud je zavřený
is_closed = page.locator('#PromptsBox_ms_div').evaluate('el => el.style.display') == 'none'
if is_closed:
page.click('#PromptsBox_ms_ShowHideBtn')
page.wait_for_timeout(2000)
# Po předchozím výběru: tužka → oko → načtení seznamu
if page.locator('#PromptsBox_ms_PageModeBtn').is_visible():
page.click('#PromptsBox_ms_PageModeBtn') # tužka → oko
page.wait_for_timeout(1000)
page.click('#PromptsBox_ms_PageModeBtn') # oko → načte milestony
page.wait_for_timeout(2000)
for milestone in MILESTONES:
search = page.locator('#PromptsBox_ms_SearchTxt')
search.wait_for(state='visible', timeout=10_000)
search.click()
search.fill(milestone)
search.press('Enter')
cb = page.locator('input[id^="PromptsBox_ms_FrontEndCBList_"]').first
try:
cb.wait_for(state='visible', timeout=8_000)
except PWTimeout:
print(f" VAROVÁNÍ: '{milestone}' nenalezen!")
continue
if not cb.is_checked():
cb.click()
print(f" '{milestone}' zaškrtnuto")
wait_load(page, 500)
def set_date_param(page, panel_id, date_value, label):
if not date_value:
return
print(f" Parametr {label}: {date_value}")
page.click(f'#{panel_id}_ShowHideBtn')
page.wait_for_timeout(1000)
date_input = page.locator(f'#{panel_id}_DatePickerTxt')
date_input.wait_for(state='visible', timeout=10_000)
date_input.click()
date_input.fill(date_value)
date_input.press('Tab')
page.wait_for_timeout(500)
# ---------------------------------------------------------------------------
# Submit a download
# ---------------------------------------------------------------------------
def submit_and_download(page, context):
print("Odesílám report (čekám na nové okno)...")
with context.expect_page() as new_page_info:
page.locator('input[value="Submit Report"], button:has-text("Submit Report")').first.click()
new_page = new_page_info.value
new_page.wait_for_url(lambda url: url != 'about:blank', timeout=30_000)
print(" Čekám na vygenerování reportu...")
new_page.wait_for_selector(
'input[value="Download File"], button:has-text("Download File")',
timeout=300_000
)
new_page.wait_for_timeout(500)
dbg(new_page, "download-window")
print(" Nastavuji parametry stahování...")
target_frame = new_page.main_frame
for frame in new_page.frames:
if frame.query_selector('select') or frame.query_selector('input[value="Download File"]'):
target_frame = frame
print(f" Frame nalezen: {frame.url}")
break
for sel in target_frame.query_selector_all('select'):
for opt in sel.query_selector_all('option'):
val = opt.get_attribute('value') or ''
txt = opt.inner_text() or ''
if 'vnd.ms-excel' in val or 'vnd.ms-excel' in txt:
sel.select_option(value=val)
print(" File type: .csv (application/vnd.ms-excel)")
break
for sel in target_frame.query_selector_all('select'):
for opt in sel.query_selector_all('option'):
if 'attachment' in (opt.get_attribute('value') or '').lower():
sel.select_option(value='attachment')
break
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
filename = f"{timestamp}_EDC_MDD3003_QueryDetails.csv"
output_path = DOWNLOAD_DIR / filename
print("Stahuji CSV...")
with new_page.expect_download(timeout=60_000) as dl_info:
btn = target_frame.query_selector('input[value="Download File"], button:has-text("Download File")')
if btn:
btn.click()
else:
new_page.locator('input[value="Download File"], button:has-text("Download File")').first.click()
download = dl_info.value
download.save_as(str(output_path))
print(f"\nHotovo! Soubor uložen: {output_path}")
try:
new_page.close()
print("Stahovací okno zavřeno.")
except Exception:
pass
return output_path
# ---------------------------------------------------------------------------
# Hlavní flow
# ---------------------------------------------------------------------------
def run():
if not PASSWORD:
print("Chyba: nastav IMEDIDATA_PASSWORD v souboru .env")
sys.exit(1)
DOWNLOAD_DIR.mkdir(exist_ok=True)
with sync_playwright() as p:
browser = p.chromium.launch(headless=False, slow_mo=200)
ctx_kwargs = {"accept_downloads": True}
use_saved = auth_valid()
if use_saved:
print("Načítám uloženou session (auth.json)...")
ctx_kwargs["storage_state"] = str(AUTH_FILE)
context = browser.new_context(**ctx_kwargs)
page = context.new_page()
logged_in = go_to_select_role(page)
if not logged_in:
if use_saved:
print("Session expirovala, mažu auth.json a přihlašuji znovu...")
AUTH_FILE.unlink(missing_ok=True)
do_login(page, context)
go_to_select_role(page)
select_role(page)
navigate_to_reporter(page)
open_report(page)
print("Nastavuji parametry reportu...")
set_study_param(page)
set_site_group_param(page)
set_query_status_param(page)
set_milestone_param(page)
set_date_param(page, 'PromptsBox_sd', START_DATE, "Start Date")
set_date_param(page, 'PromptsBox_ed', END_DATE, "End Date")
submit_and_download(page, context)
browser.close()
print("Prohlížeč zavřen.")
if __name__ == "__main__":
run()
+29
View File
@@ -0,0 +1,29 @@
2026-05-20 17:56:21,647 ERROR Nelze se připojit k MongoDB (mongodb://localhost:27017): localhost:27017: [WinError 10061] No connection could be made because the target machine actively refused it (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 5.0s, Topology Description: <TopologyDescription id: 6a0dd9a0ce7e4c93f3399a61, topology_type: Unknown, servers: [<ServerDescription ('localhost', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('localhost:27017: [WinError 10061] No connection could be made because the target machine actively refused it (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms)')>]>
2026-05-20 17:56:45,268 INFO Importuji: downloads/2026-05-20_15-09_EDC_MDD3003_DateofVisit_DataListing.csv → edc.MDD3003_DateofVisit
2026-05-20 17:56:48,052 INFO nové: 381 aktualizované: 0 chyby: 0
2026-05-20 17:56:48,052 INFO ============================================================
2026-05-20 17:56:48,052 INFO Celkem — nové: 381 aktualizované: 0 chyby: 0
2026-05-20 18:12:48,691 INFO Importuji: downloads/2026-05-20_15-21_EDC_MDD3003_QueryDetails.csv → edc.2026-05-20_15-21_EDC_MDD3003_QueryDetails
2026-05-20 18:12:48,739 INFO nové: 4 aktualizované: 0 chyby: 0
2026-05-20 18:12:48,801 INFO Importuji: downloads/2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv → edc.2026-05-20_15-23_EDC_MDD3003_QueryDetails
2026-05-20 18:13:03,331 INFO nové: 2091 aktualizované: 0 chyby: 0
2026-05-20 18:13:03,332 INFO ============================================================
2026-05-20 18:13:03,332 INFO Celkem — nové: 2095 aktualizované: 0 chyby: 0
2026-05-20 18:13:31,267 INFO Importuji: downloads/2026-05-20_15-21_EDC_MDD3003_QueryDetails.csv → edc.MDD3003_QueryDetails
2026-05-20 18:13:31,306 INFO nové: 4 aktualizované: 0 chyby: 0
2026-05-20 18:13:31,354 INFO Importuji: downloads/2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv → edc.MDD3003_QueryDetails
2026-05-20 18:13:45,497 INFO nové: 2087 aktualizované: 4 chyby: 0
2026-05-20 18:13:45,497 INFO ============================================================
2026-05-20 18:13:45,497 INFO Celkem — nové: 2091 aktualizované: 4 chyby: 0
2026-05-20 18:14:06,652 INFO Importuji: downloads/2026-05-20_15-21_EDC_MDD3003_QueryDetails.csv → edc.queries
2026-05-20 18:14:06,683 INFO nové: 4 aktualizované: 0 chyby: 0
2026-05-20 18:14:06,727 INFO Importuji: downloads/2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv → edc.queries
2026-05-20 18:14:22,340 INFO nové: 2087 aktualizované: 4 chyby: 0
2026-05-20 18:14:22,340 INFO ============================================================
2026-05-20 18:14:22,340 INFO Celkem — nové: 2091 aktualizované: 4 chyby: 0
2026-05-20 21:56:49,619 INFO Importuji: downloads/2026-05-20_15-21_EDC_MDD3003_QueryDetails.csv → edc.queries + queries_snapshots [2026-05-20]
2026-05-20 21:56:49,670 INFO nové: 0 aktualizované: 4 chyby: 0
2026-05-20 21:56:49,711 INFO Importuji: downloads/2026-05-20_15-23_EDC_MDD3003_QueryDetails.csv → edc.queries + queries_snapshots [2026-05-20]
2026-05-20 21:57:07,554 INFO nové: 0 aktualizované: 2091 chyby: 0
2026-05-20 21:57:07,554 INFO ============================================================
2026-05-20 21:57:07,554 INFO Celkem — nové: 0 aktualizované: 2095 chyby: 0
+21
View File
@@ -0,0 +1,21 @@
{
"_meta": {
"study": "42847922MDD3003",
"site_group": "CZE",
"report": "_EDC Std Rpt - Data Listing (Data Stream), ReportID=92",
"total_forms": 97,
"pages": 5,
"note": "Kompletni seznam nacist z PromptsPage po vyberu Study+SiteGroup, Form panel, 5 stranek po 20 (posledni 17)"
},
"known_forms": [
"Acknowledgement Reporting Form",
"Acknowledgement Upload Form",
"Adverse Event of Special Interest",
"Adverse Events/Serious Aes",
"Alcohol Test",
"Arizona Sexual Experiences Scale Summary",
"Arizona Sexual Experiences Scale-Female",
"Arizona Sexual Experiences Scale-Male",
"Date of Visit"
]
}
+297
View File
@@ -0,0 +1,297 @@
"""
import_to_mongo.py
Verze: 1.0
Datum: 2026-05-27
Import EDC Data Listing CSV do MongoDB (databáze: edc).
Kolekce: {STUDY}.{FormName} (např. UCO3001.ConcomitantTherapy)
Filtr: pouze řádky s SiteGroupName == "CZE"
Historie: při změně fields se stará verze uloží do pole history[]
Po importu přesune zpracované CSV do downloads/Zpracovano/
Použití:
python import_to_mongo.py # importuje všechny CSV z downloads/
python import_to_mongo.py downloads/konkretni.csv # jeden soubor
"""
import csv
import re
import shutil
import sys
from datetime import datetime, timezone
from pathlib import Path
from pymongo import MongoClient, ASCENDING
MONGO_URI = "mongodb://192.168.1.76:27017"
DB_NAME = "edc"
DOWNLOADS_DIR = Path(__file__).parent / "downloads"
PROCESSED_DIR = DOWNLOADS_DIR / "Zpracovano"
COUNTRY_FILTER = "CZE"
# ---------------------------------------------------------------------------
# Mapování pevných CSV sloupců
# ---------------------------------------------------------------------------
FIXED_FIELDS = {
"SiteGroupName": "site.group",
"SiteID": "site.id",
"SiteNumber": "site.number",
"Site": "site.name",
"SubjectID": "subject.id",
"Subject": "subject.label",
"CRFVersionID": "form.crfVersionId",
"InstanceID": "form.instanceId",
"InstanceName": "form.instanceName",
"FolderSeq": "form.folderSeq",
"Page": "form.page",
"RecordID": "form.recordId",
"RecordPosition": "form.recordPosition",
"LastModifiedDate": "lastModified",
}
INT_CAST = {"form.folderSeq", "form.recordPosition"}
META_FIELDS = {
"StudyName", "SiteGroupParameter", "SiteNumberParameter", "SiteParameter",
"SubjectParameter", "FormParameter", "FieldParameter", "FilterField",
"FilterValue", "StartDateParameter", "EndDateParameter", "RunUser",
"VersionNumber", "PrintDateTime", "TimeZone", "LastModifiedDateSortable",
"StartDateSortable", "EndDateSortable", "ErrorMsg",
}
DATE_FORMATS = [
"%d %b %Y %H:%M:%S",
"%d %b %Y %H:%M:%S:%f",
"%d %b %Y",
"%d %B %Y",
"%Y%m%d %H:%M:%S.%f",
"%Y-%m-%d %H:%M:%S",
"%m/%d/%Y %I:%M:%S %p",
]
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def parse_date(value: str) -> str | None:
value = value.strip()
for fmt in DATE_FORMATS:
try:
dt = datetime.strptime(value, fmt)
return dt.replace(tzinfo=timezone.utc).isoformat()
except ValueError:
continue
return None
def set_nested(doc: dict, path: str, value) -> None:
parts = path.split(".")
for part in parts[:-1]:
doc = doc.setdefault(part, {})
doc[parts[-1]] = value
def extract_snapshot_date(filename: str) -> str:
match = re.match(r"(\d{4}-\d{2}-\d{2})", Path(filename).name)
return match.group(1) if match else datetime.now().strftime("%Y-%m-%d")
def parse_collection_name(filename: str) -> str | None:
"""
Z názvu souboru odvodí kolekci ve formátu STUDY.FormName.
Vrátí None pro QueryDetails (mají vlastní flow).
"""
stem = Path(filename).stem
if "QueryDetails" in stem:
return None
match = re.search(
r"EDC_(\w+?)_(?:ALL_|CZE_|[A-Z]{2,3}_)?(.+?)_DataListing",
stem, re.IGNORECASE,
)
if match:
study, form = match.group(1), match.group(2)
return f"{study}.{form}"
return None
# ---------------------------------------------------------------------------
# CSV → dokument
# ---------------------------------------------------------------------------
def map_row(row: dict) -> dict:
doc: dict = {}
fields: dict = {}
field_keys = set(row.keys())
for col, value in row.items():
value = value.strip() if value else ""
if col in FIXED_FIELDS:
path = FIXED_FIELDS[col]
if path in INT_CAST:
try:
value = int(value)
except (ValueError, TypeError):
pass
elif path == "lastModified":
parsed = parse_date(value)
value = parsed if parsed else value
set_nested(doc, path, value)
continue
if col in META_FIELDS:
continue
if re.match(r"^Field\d+(Value|Label)$", col):
continue
n = 1
while True:
val_key = f"Field{n}Value"
lbl_key = f"Field{n}Label"
if val_key not in field_keys and lbl_key not in field_keys:
break
label = (row.get(lbl_key) or "").strip()
value = (row.get(val_key) or "").strip()
if label and value:
parsed = parse_date(value)
fields[label] = parsed if parsed else value
n += 1
doc["fields"] = fields
return doc
# ---------------------------------------------------------------------------
# Import jednoho souboru
# ---------------------------------------------------------------------------
def import_file(csv_path: str, db) -> dict:
filename = Path(csv_path).name
col_name = parse_collection_name(filename)
if col_name is None:
print(f" Preskakuji (QueryDetails): {filename}")
return {"skipped": True}
snapshot_date = extract_snapshot_date(filename)
collection = db[col_name]
inserted = changed = unchanged = filtered_out = 0
with open(csv_path, encoding="utf-8", newline="") as f:
reader = csv.DictReader(f, delimiter=",", quotechar='"')
for row in reader:
site_group = (row.get("SiteGroupName") or "").strip()
if site_group != COUNTRY_FILTER:
filtered_out += 1
continue
doc = map_row(row)
record_id = doc.get("form", {}).get("recordId")
if not record_id:
continue
doc["sourceFile"] = filename
existing = collection.find_one({"form.recordId": record_id})
if existing is None:
doc["firstSeen"] = snapshot_date
doc["lastSeen"] = snapshot_date
doc["history"] = []
collection.insert_one(doc)
inserted += 1
elif existing.get("fields") != doc["fields"]:
old_entry = {
"date": existing.get("lastSeen", snapshot_date),
"fields": existing["fields"],
}
update_doc = {k: v for k, v in doc.items()}
update_doc["lastSeen"] = snapshot_date
collection.update_one(
{"_id": existing["_id"]},
{
"$push": {"history": old_entry},
"$set": update_doc,
},
)
changed += 1
else:
collection.update_one(
{"_id": existing["_id"]},
{"$set": {"lastSeen": snapshot_date, "sourceFile": filename}},
)
unchanged += 1
collection.create_index([("form.recordId", ASCENDING)], unique=True)
collection.create_index([("subject.label", ASCENDING)])
collection.create_index([("site.number", ASCENDING)])
stats = {
"collection": col_name,
"snapshot": snapshot_date,
"inserted": inserted,
"changed": changed,
"unchanged": unchanged,
"filtered_out": filtered_out,
}
print(f" {col_name} [{snapshot_date}]: +{inserted} new, ~{changed} changed, ={unchanged} same, -{filtered_out} non-CZE")
return stats
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
paths: list[Path] = []
if len(sys.argv) > 1:
for arg in sys.argv[1:]:
p = Path(arg)
if p.is_file():
paths.append(p)
else:
print(f"Soubor nenalezen: {arg}")
else:
paths = sorted(DOWNLOADS_DIR.glob("*_DataListing.csv"))
if not paths:
print("Zadne CSV soubory k importu.")
return
print(f"Nalezeno {len(paths)} souboru.\n")
client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
client.admin.command("ping")
db = client[DB_NAME]
PROCESSED_DIR.mkdir(exist_ok=True)
total = {"inserted": 0, "changed": 0, "unchanged": 0}
for csv_path in paths:
print(f"Import: {csv_path.name}")
stats = import_file(str(csv_path), db)
if not stats.get("skipped"):
for k in total:
total[k] += stats.get(k, 0)
dest = PROCESSED_DIR / csv_path.name
shutil.move(str(csv_path), str(dest))
print(f" -> presunut do Zpracovano/")
client.close()
print(f"\nCelkem: +{total['inserted']} new, ~{total['changed']} changed, ={total['unchanged']} same")
if __name__ == "__main__":
main()
+103
View File
@@ -0,0 +1,103 @@
{
"study": "42847922MDD3003",
"report_id": 92,
"forms": [
"Acknowledgement Reporting Form",
"Acknowledgement Upload Form",
"Adverse Event of Special Interest",
"Adverse Events/Serious Aes",
"Alcohol Test",
"Arizona Sexual Experiences Scale Summary",
"Arizona Sexual Experiences Scale-Female",
"Arizona Sexual Experiences Scale-Male",
"Change in Background Antidepressant",
"Clinical Global Impression - S (Depression)",
"Clinical Outcome Assessments Completion Status",
"Columbia-Suicide Severity Rating Scale - Baseline/Screening Version",
"Columbia-Suicide Severity Rating Scale - Since Last Visit",
"Comments",
"Concomitant Therapy / Medication",
"Date of Visit",
"Death Information",
"Demographics",
"Derivation (operational form)",
"Drug Expiry Information",
"Drug Testing",
"DSL Index Page (Must have for DSL functionality; not visible to the sites) (operational form)",
"DUMMY (operational form)",
"Educational Level",
"Enrollment",
"EQ-5D-5L",
"Evaluation of Response - Induction Phase",
"Evaluation of Response - Stabilization Phase",
"General Medical History",
"Inclusion/Exclusion Criteria",
"Insomnia Severity Index",
"Insomnia Severity Index (Clinician Version)",
"Integrated Medication Kit Accountability Information Double Blind Maintenance",
"Integrated Medication Kit Accountability Information Double Blind Part 1",
"Integrated Medication Kit Accountability Information Open Label Part 2",
"Interim Investigator Signature",
"IRT Stratification",
"Local Chemistry (Unscheduled)",
"Local Hematology (Unscheduled)",
"Local Labs for Background Antidepressant Compliance",
"Menstrual Cycle Tracking",
"Menstrual Cycle Tracking Log",
"MGH ATRQ, Geriatric, Section I and II",
"MGH ATRQ, Non-Geriatric, Section I and II",
"MGH-ATRQ - Summary",
"MGH-ATRQ - Therapy Questions",
"Mini-Mental State Examination",
"Neurologic Examination",
"Patient Global Impression of Change - Depression/Insomnia",
"Patient Global Impression of Severity - Insomnia",
"Patient Health Questionnaire - 9 Item",
"Perceived Treatment Group Assignment",
"Periodic Investigator's EDC Review Acknowledgement",
"Physical Examination",
"Physician Withdrawal Checklist (PWC)",
"Pregnancy Test",
"Preplanned Surgeries/Procedures",
"Procedures",
"PROMIS - Sleep Disturbance",
"Protocol Amendment Implementation (Operational Form)",
"Psychiatric History for Major Depressive Disorder",
"Psychotherapy",
"Psychotherapy v2.0",
"Randomization",
"Relapse - MAJOR DEPRESSIVE DISORDER",
"Relapse Criteria",
"Relapse Criteria Unscheduled",
"Relevant Additional Drug Therapies",
"Relevant information Selection",
"Relevant Local Laboratory",
"Relevant Local Laboratory Data",
"Relevant Medical History",
"Relevant Preplanned Surgeries/Procedures",
"Relevant Procedures",
"Relevant Study Medication",
"Relevant Tests",
"Safety Report Form",
"SCID-CT",
"SCID-CT Insomnia Disorder Supplemental",
"Sheehan Disability Scale",
"SIGH-D-17",
"Site Independent Qualification Assessment",
"Site/Invest Identification",
"Structured Interview Guide for the Montgomery-Asberg Depression Rating Scale",
"Study Drug Administration Double Blind Maintenance",
"Study Drug Administration Double Blind Part 1",
"Study Drug Administration Open Label Part 2 - Induction",
"Study Drug Administration Open Label Part 2 - Stabilization",
"Subject",
"Subject Site Switch",
"Treatment Disposition",
"Treatment Unblinding",
"Trial Disposition Completion/ Discontinuation",
"Unscheduled Assessments",
"Unsuccessful Contact Attempts",
"Vital Signs",
"Vital Signs (Unscheduled)"
]
}
+58
View File
@@ -0,0 +1,58 @@
[
{"name": "_EDC Std Rpt - Changes/Queries after SDV", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=97"},
{"name": "_EDC Std Rpt - Changes/Queries after SDV", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=98"},
{"name": "_EDC Std Rpt - Clinical Safety Case Report", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=93"},
{"name": "_EDC Std Rpt - CTMS Activities Listing", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=111"},
{"name": "_EDC Std Rpt - Data Cleaning Progress and Finalization", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=102"},
{"name": "_EDC Std Rpt - Data Cleaning Progress and Finalization", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=105"},
{"name": "_EDC Std Rpt - Data Cleaning Progress Visit Detail", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=113"},
{"name": "_EDC Std Rpt - Data Cleaning Progress Visit Detail", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=112"},
{"name": "_EDC Std Rpt - Data Listing", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=92"},
{"name": "_EDC Std Rpt - Data Listing", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=99"},
{"name": "_EDC Std Rpt - eCRF Version Comparison", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=127"},
{"name": "_EDC Std Rpt - eCRF Version Comparison", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=128"},
{"name": "_EDC Std Rpt - Expected eCRF Fields Not Entered", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=130"},
{"name": "_EDC Std Rpt - Expected eCRF Fields Not Entered", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=129"},
{"name": "_EDC Std Rpt - GMS SAE Report", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=91"},
{"name": "_EDC Std Rpt - ICF Log Listing", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=132"},
{"name": "_EDC Std Rpt - ICF Log Listing", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=131"},
{"name": "_EDC Std Rpt - Inactivated Datalist", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=133"},
{"name": "_EDC Std Rpt - Inactivated Datalist", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=134"},
{"name": "_EDC Std Rpt - Inactive DataPages", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=136"},
{"name": "_EDC Std Rpt - Inactive DataPages", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=135"},
{"name": "_EDC Std Rpt - J&J CAR-T SAE Report", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=137"},
{"name": "_EDC Std Rpt - Local Lab Normal Ranges", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=138"},
{"name": "_EDC Std Rpt - Medical Affairs SAE Report", "description": "For MA Pre-Configured Library", "type": "Global", "url": "PromptsPage.aspx?ReportID=139"},
{"name": "_EDC Std Rpt - Missing Pages", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=141"},
{"name": "_EDC Std Rpt - Missing Pages", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=140"},
{"name": "_EDC Std Rpt - Missing Visits", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=149"},
{"name": "_EDC Std Rpt - Missing Visits", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=148"},
{"name": "_EDC Std Rpt - Monitor SDV Planning", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=151"},
{"name": "_EDC Std Rpt - Monitor SDV Planning", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=150"},
{"name": "_EDC Std Rpt - Non Conformant Data Listing", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=152"},
{"name": "_EDC Std Rpt - Pages to Review / SDV / Freeze", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=154"},
{"name": "_EDC Std Rpt - Pages to Review / SDV / Freeze", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=153"},
{"name": "_EDC Std Rpt - Query Aging", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=156"},
{"name": "_EDC Std Rpt - Query Aging", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=155"},
{"name": "_EDC Std Rpt - Query Details", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=157"},
{"name": "_EDC Std Rpt - Query Details", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=164"},
{"name": "_EDC Std Rpt - Query Trend", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=168"},
{"name": "_EDC Std Rpt - Query Trend", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=166"},
{"name": "_EDC Std Rpt - Re-UAT Compare Report", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=195"},
{"name": "_EDC Std Rpt - Safety Gateway Cover Sheet", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=171"},
{"name": "_EDC Std Rpt - Safety Gateway e2b XML Case Report", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=172"},
{"name": "_EDC Std Rpt - Safety Gateway Reconciliation", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=179"},
{"name": "_EDC Std Rpt - Safety Gateway System Configuration", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=194"},
{"name": "_EDC Std Rpt - SafetyGateway Mapping Configuration", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=180"},
{"name": "_EDC Std Rpt - Self Evident Corrections", "description": "", "type": "Global", "url": "PromptsPage.aspx?ReportID=178"},
{"name": "_EDC Std Rpt - Self Evident Corrections", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=177"},
{"name": "_EDC Std Rpt - Signature History Report", "description": "", "type": "Global/Data Stream", "url": "PromptsPage.aspx?ReportID=176"},
{"name": "_EDC Std Rpt - Site Payment", "description": "", "type": "Data Stream", "url": "PromptsPage.aspx?ReportID=null"},
{"name": "_EDC Std Rpt - Special Characters", "description": "", "type": "Global/Data Stream", "url": "PromptsPage.aspx?ReportID=null"},
{"name": "360 Data Cleaning Progress Dashboard", "description": "Track eCRF Data Cleaning Progress", "type": "Dashboard", "url": "PromptsPage.aspx?ReportID=null"},
{"name": "360 Enrollment Tracking Dashboard", "description": "Track Subject Enrollment Performance", "type": "Dashboard", "url": "PromptsPage.aspx?ReportID=null"},
{"name": "360 Query Management Dashboard", "description": "Manage Queries", "type": "Dashboard", "url": "PromptsPage.aspx?ReportID=null"},
{"name": "Audit Trail", "description": "Audit Trail Report", "type": "Standard", "url": "PromptsPage.aspx?ReportID=null"},
{"name": "Data Listing", "description": "Data Listing Report", "type": "Standard", "url": "PromptsPage.aspx?ReportID=null"},
{"name": "Stream-Query Detail", "description": "Query Detail Report", "type": "Stream", "url": "PromptsPage.aspx?ReportID=null"}
]
+11
View File
@@ -0,0 +1,11 @@
from download_edc_datalistings import download_datalisting
download_datalisting(
study="77242113UCO3001",
forms=[
"Trial Disposition (Completion / Discontinuation)",
"Date of Visit",
"Concomitant Therapy",
],
country="CZE",
)
+77
View File
@@ -0,0 +1,77 @@
{
"study": "77242113UCO3001",
"report_id": 92,
"forms": [
"Acknowledgement Reporting Form",
"Acknowledgement Upload Form",
"Additional Liver Event Assessment Forms",
"Advanced Therapy Treatment Failure Reason",
"Adverse Events/Serious AEs",
"Alcohol Consumption",
"Axial Spondyloarthropathy Diagnosis Information",
"Clinical Outcome Assessments Completion Status",
"Concomitant Therapy",
"Consents / Withdrawal of Consents for Optional Research",
"Corticosteroid, Immunomodulator And Oral Aminosalicylates History",
"Date of Visit",
"Death Information",
"Demographics",
"Derivation (operational form)",
"DSL Index Page (Must have for DSL functionality; not visible to the sites) (operational form)",
"DUMMY (operational form)",
"Endoscopy Information",
"Enrollment",
"Family History specific to Hepatic Event",
"Food/Liquid Fasting Compliance",
"General Medical History",
"GI Related Surgeries and Procedures",
"Group Selection",
"Hepatic Event - Other Risk Factors",
"History of GI Past Related Surgeries/Procedures",
"Inclusion/Exclusion Criteria",
"Integrated Medication Kit Accountability Information",
"Interim Investigator Signature",
"Intestinal Ultrasound",
"Limitation on Retention of Samples",
"Liver Biopsy",
"Liver Chemistry Abnormalities Assessment Form",
"Liver Event Case of AEs",
"Liver Event Chemistry Analytes",
"Liver Event Level 1 Analytes",
"Liver Event Level 2 Analytes",
"Liver Event Workup Completion Status",
"Liver Imaging Assessment",
"Liver-related Signs and Symptoms of Hypersensitivity",
"Liver-related Signs and Symptoms of Liver Injury",
"Medical Encounters",
"Medical History: Liver-related Diseases",
"Periodic Investigator's EDC Review Acknowledgement",
"Pharmacokinetics, Pharmacodynamic and Biomarker Sample Collection",
"Preplanned Surgeries/Procedures",
"Protocol Amendment Implementation (Operational Form)",
"Randomization",
"Relevant Additional Drug Therapies",
"Relevant Information Selection",
"Relevant Local Laboratory",
"Relevant Local Laboratory Data",
"Relevant Procedures",
"Relevant Study Medication",
"Relevant Tests",
"Safety Report Form",
"Screening for Tuberculosis",
"Site/Invest Identification",
"Study Drug Administration",
"Subject",
"Subject Site Switch",
"Substance Use Alcohol",
"Substance Use Tobacco/Nicotine",
"Treatment Disposition (End of treatment)",
"Treatment Unblinding",
"Trial Disposition (Completion / Discontinuation)",
"Tuberculosis Testing and Results",
"Ulcerative Colitis Disease History",
"Ulcerative Colitis Medication History",
"Unscheduled Assessments",
"Vital Signs"
]
}