commit aa194daf91f016d7b72c9d1feaed2e8a8a75c153 Author: vladimir.buzalka Date: Wed Apr 8 14:57:17 2026 +0200 Initial commit: Janssen 4G Clinical report automation - download_reports.py: Playwright script for site inventory XLS downloads - download_ip_destruction.py: Playwright script for IP destruction basket downloads - create_accountability_report.py: combines both sources into formatted accountability Excel - list_reports.py: discovers available reports on portal - reports.json: 21 available report URLs - .gitignore: excludes downloaded XLS files and output Excel Co-Authored-By: Claude Sonnet 4.6 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0ee9a6e --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +xls_reports/ +xls_ip_destruction/ +accountability_combined.xlsx +accountability_formatted.xlsx +__pycache__/ +*.pyc +.idea/ +.claude/ diff --git a/create_accountability_report.py b/create_accountability_report.py new file mode 100644 index 0000000..f11b2e3 --- /dev/null +++ b/create_accountability_report.py @@ -0,0 +1,172 @@ +import pandas as pd +from pathlib import Path +from openpyxl import load_workbook +from openpyxl.styles import Font, PatternFill, Alignment, Border, Side +from openpyxl.utils import get_column_letter + +INVENTORY_DIR = Path("xls_reports") +DESTRUCTION_DIR = Path("xls_ip_destruction") +OUTPUT_FILE = "accountability_combined.xlsx" +SHEET_NAME = "CountryMedicationOverview" + +COLUMN_RENAMES = { + "Site": "Site", + "Medication ID": "Med ID", + "Packaged Lot number": "Lot No.", + "Original Expiration Date when Packaged Lot was Added": "Orig Exp Date", + "Expiration date": "Exp Date", + "Received Date": "Rcv Date", + "Shipment Receipt User": "Rcpt User", + "Subject Identifier": "Subject ID", + "Quantity Assigned": "Qty Asgn", + "IRT Transaction": "IRT Tx", + "Date Assigned": "Date Asgn", + "Assignment User": "Asgn User", + "Dispensation Status": "Disp Status", + "Dispensing Date": "Disp Date", + "Quantity Dispensed": "Qty Disp", + "Dispensing User": "Disp User", + "Quantity Returned": "Qty Ret", + "Date Returned": "Date Ret", + "Return User": "Ret User", + "DestroyedOn": "Destroyed", + "Basket number": "Basket No.", +} + +DATE_COLUMNS = { + "Orig Exp Date", "Exp Date", "Rcv Date", + "Date Asgn", "Disp Date", "Date Ret", "Destroyed", +} + +COLUMN_WIDTHS = { + "Site": 14, + "Med ID": 10, + "Lot No.": 12, + "Orig Exp Date": 16, + "Exp Date": 14, + "Rcv Date": 14, + "Rcpt User": 22, + "Subject ID": 14, + "Qty Asgn": 9, + "IRT Tx": 8, + "Date Asgn": 14, + "Asgn User": 20, + "Disp Status": 16, + "Disp Date": 14, + "Qty Disp": 9, + "Disp User": 20, + "Qty Ret": 10, + "Date Ret": 14, + "Ret User": 18, + "Destroyed": 14, + "Basket No.": 12, +} + + +def read_inventory(path): + df = pd.read_excel(path, header=None) + header_row = df[df[0] == "Medication ID"].index[0] + data = pd.read_excel(path, header=header_row) + meta = {} + for i in range(header_row): + val = str(df.iloc[i, 0]) if pd.notna(df.iloc[i, 0]) else "" + if val.startswith("Site:"): + meta["site"] = val.replace("Site:", "").strip() + return data, meta + + +def read_destruction_lookup(): + lookup = {} + for path in DESTRUCTION_DIR.glob("*.xlsx"): + df = pd.read_excel(path, header=None) + basket_id = None + destroyed_on = None + for i in range(15): + val = str(df.iloc[i, 0]) if pd.notna(df.iloc[i, 0]) else "" + if val.startswith("Basket ID:"): + basket_id = val.replace("Basket ID:", "").strip() + if val.startswith("Drug Destruction Created Date:"): + destroyed_on = val.replace("Drug Destruction Created Date:", "").strip() + header_row = df[df[0] == "Medication ID Description"].index[0] + data = pd.read_excel(path, header=header_row) + for med_id in data["Medication ID"].dropna(): + lookup[int(med_id)] = (basket_id, destroyed_on) + return lookup + + +def main(): + lookup = read_destruction_lookup() + print(f"Loaded {len(lookup)} kits from destruction reports") + + all_rows = [] + for path in sorted(INVENTORY_DIR.glob("onsite_inventory_detail_*.xlsx")): + df, meta = read_inventory(path) + df["DestroyedOn"] = df["Medication ID"].apply( + lambda x: lookup.get(int(x), (None, None))[1] if pd.notna(x) else None + ) + df["Basket number"] = df["Medication ID"].apply( + lambda x: lookup.get(int(x), (None, None))[0] if pd.notna(x) else None + ) + df.insert(0, "Site", meta.get("site", path.stem)) + all_rows.append(df) + print(f" {path.name}: {len(df)} kits") + + combined = pd.concat(all_rows, ignore_index=True) + + # Rename columns + combined.rename(columns=COLUMN_RENAMES, inplace=True) + + # Convert date columns + for col in DATE_COLUMNS: + if col in combined.columns: + combined[col] = pd.to_datetime(combined[col], dayfirst=True, errors="coerce") + + # Sort + combined.sort_values(["Site", "Rcv Date", "Med ID"], inplace=True, ignore_index=True) + + combined.to_excel(OUTPUT_FILE, index=False, sheet_name=SHEET_NAME) + + # ── Formatting ──────────────────────────────────────────────────────────── + wb = load_workbook(OUTPUT_FILE) + ws = wb[SHEET_NAME] + + header_fill = PatternFill("solid", start_color="1F4E79") + header_font = Font(bold=True, color="FFFFFF", name="Arial", size=10) + new_col_fill = PatternFill("solid", start_color="E2EFDA") + row_font = Font(name="Arial", size=10) + + thin = Side(style="thin", color="000000") + border = Border(left=thin, right=thin, top=thin, bottom=thin) + + headers = [cell.value for cell in ws[1]] + new_cols = {"Destroyed", "Basket No."} + + for cell in ws[1]: + cell.fill = header_fill + cell.font = header_font + cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=False) + cell.border = border + + for row in ws.iter_rows(min_row=2, max_row=ws.max_row): + for cell in row: + col_name = headers[cell.column - 1] if cell.column <= len(headers) else None + cell.font = row_font + cell.border = border + cell.alignment = Alignment(horizontal="center") + if col_name in DATE_COLUMNS: + cell.number_format = "DD-MMM-YYYY" + if col_name in new_cols: + cell.fill = new_col_fill + + for cell in ws[1]: + width = COLUMN_WIDTHS.get(cell.value, 14) + ws.column_dimensions[get_column_letter(cell.column)].width = width + + ws.auto_filter.ref = ws.dimensions + ws.freeze_panes = "A2" + + wb.save(OUTPUT_FILE) + print(f"\nSaved: {OUTPUT_FILE} ({len(combined)} rows, sheet '{SHEET_NAME}')") + + +main() diff --git a/download_ip_destruction.py b/download_ip_destruction.py new file mode 100644 index 0000000..5e2959d --- /dev/null +++ b/download_ip_destruction.py @@ -0,0 +1,81 @@ +from playwright.sync_api import sync_playwright +import os + +# ── CONFIG ────────────────────────────────────────────────────────────────── +BASE_URL = "https://janssen.4gclinical.com" + +EMAIL = "vbuzalka@its.jnj.com" +PASSWORD = "Vlado123++-" + +OUTPUT_DIR = "xls_ip_destruction" +# ──────────────────────────────────────────────────────────────────────────── + +os.makedirs(OUTPUT_DIR, exist_ok=True) + + +def download_ip_destruction(): + with sync_playwright() as p: + browser = p.chromium.launch(headless=False) + context = browser.new_context(accept_downloads=True) + page = context.new_page() + + # Přihlášení + page.goto(BASE_URL) + page.wait_for_load_state("networkidle") + page.get_by_label("Email *").fill(EMAIL) + page.get_by_label("Password *").fill(PASSWORD) + page.locator('#login__submit').click() + page.wait_for_load_state("networkidle") + + # Výběr studie + page.get_by_label("Study *").click() + page.get_by_role("option", name="42847922MDD3003").click() + page.get_by_role("button", name="SELECT").click() + page.wait_for_load_state("networkidle") + + # Naviguj na report + page.goto(f"{BASE_URL}/report/ip_destruction_form") + page.wait_for_load_state("networkidle", timeout=15000) + + # Přečti dostupné košíky + page.locator('input[placeholder="search"], input[type="text"]').first.click() + page.wait_for_timeout(1000) + baskets = [b.strip() for b in page.locator('mat-option').all_inner_texts()] + print(f"Nalezeno {len(baskets)} košíků: {baskets}") + page.keyboard.press("Escape") + page.wait_for_timeout(500) + + for basket in baskets: + filename = os.path.join(OUTPUT_DIR, f"ip_destruction_basket_{basket}.xlsx") + if os.path.exists(filename): + print(f"[{basket}] Přeskakuji — soubor již existuje.") + continue + print(f"[{basket}] Stahuji...") + + # Otevři dropdown a vyber košík přes dispatch_event + input_field = page.locator('input[placeholder="search"], input[type="text"]').first + input_field.click() + input_field.fill(basket) + page.wait_for_timeout(500) + page.locator('mat-option').first.dispatch_event('click') + + # Počkej na načtení dat + page.wait_for_load_state("networkidle", timeout=30000) + + # Stáhni XLS + with page.expect_download(timeout=30000) as dl: + page.get_by_role("button", name="Download XLS").click() + + download = dl.value + download.save_as(filename) + print(f"[{basket}] Uloženo → {filename}") + + # Reset pro další košík + page.get_by_role("button", name="Clear").click() + page.wait_for_load_state("networkidle", timeout=15000) + + browser.close() + print("\nHotovo!") + + +download_ip_destruction() diff --git a/download_reports.py b/download_reports.py new file mode 100644 index 0000000..8196945 --- /dev/null +++ b/download_reports.py @@ -0,0 +1,76 @@ +from playwright.sync_api import sync_playwright +import os + +# ── CONFIG ────────────────────────────────────────────────────────────────── +BASE_URL = "https://janssen.4gclinical.com" + +EMAIL = "vbuzalka@its.jnj.com" +PASSWORD = "Vlado123++-" + +SITES = [ + "S10-CZ10002", + "S10-CZ10004", + "S10-CZ10005", + "S10-CZ10008", + "S10-CZ10011", + "S10-CZ10012", +] + +OUTPUT_DIR = "xls_reports" +# ──────────────────────────────────────────────────────────────────────────── + +os.makedirs(OUTPUT_DIR, exist_ok=True) + + +def download_reports(): + with sync_playwright() as p: + browser = p.chromium.launch(headless=False) + context = browser.new_context(accept_downloads=True) + page = context.new_page() + + # Přihlášení + page.goto(BASE_URL) + page.wait_for_load_state("networkidle") + page.get_by_label("Email *").fill(EMAIL) + page.get_by_label("Password *").fill(PASSWORD) + page.locator('#login__submit').click() + page.wait_for_load_state("networkidle") + + # Výběr studie + page.get_by_label("Study *").click() + page.get_by_role("option", name="42847922MDD3003").click() + page.get_by_role("button", name="SELECT").click() + page.wait_for_load_state("networkidle") + + # Naviguj na report stránku + page.goto(f"{BASE_URL}/report/onsite_inventory_detail") + page.wait_for_load_state("networkidle", timeout=15000) + + for site_id in SITES: + print(f"[{site_id}] Stahuji...") + + # Otevři dropdown a vyber site + page.locator('input[placeholder="search"], input[type="text"]').first.click() + page.get_by_role("option", name=site_id).click() + + # Počkej na dokončení načítání dat (síť se uklidní) + page.wait_for_load_state("networkidle", timeout=30000) + + # Stáhni XLS + with page.expect_download(timeout=30000) as dl: + page.get_by_role("button", name="Download XLS").click() + + download = dl.value + filename = os.path.join(OUTPUT_DIR, f"onsite_inventory_detail_{site_id}.xlsx") + download.save_as(filename) + print(f"[{site_id}] Uloženo → {filename}") + + # Zruš výběr site pro další iteraci + page.get_by_role("button", name="Clear").click() + page.wait_for_load_state("networkidle", timeout=15000) + + browser.close() + print("\nHotovo! Všechny reporty staženy.") + + +download_reports() diff --git a/list_reports.py b/list_reports.py new file mode 100644 index 0000000..ab0e876 --- /dev/null +++ b/list_reports.py @@ -0,0 +1,74 @@ +from playwright.sync_api import sync_playwright +import json + +# ── CONFIG ────────────────────────────────────────────────────────────────── +BASE_URL = "https://janssen.4gclinical.com" +STUDY = "42847922MDD3003" + +EMAIL = "vbuzalka@its.jnj.com" +PASSWORD = "Vlado123++-" # doplň heslo +# ──────────────────────────────────────────────────────────────────────────── + + +def list_reports(): + with sync_playwright() as p: + browser = p.chromium.launch(headless=False) + page = browser.new_page() + + # Přihlášení + page.goto(BASE_URL) + page.wait_for_load_state("networkidle") + + page.get_by_label("Email *").fill(EMAIL) + page.get_by_label("Password *").fill(PASSWORD) + page.locator('#login__submit').click() + page.wait_for_load_state("networkidle") + + # Výběr studie — klikni na dropdown, vyber studii, klikni SELECT + page.get_by_label("Study *").click() + page.get_by_role("option", name=STUDY).click() + page.get_by_role("button", name="SELECT").click() + page.wait_for_load_state("networkidle") + + # Přejdi na seznam reportů + page.goto(f"{BASE_URL}/reports") + page.wait_for_load_state("networkidle") + page.wait_for_selector('[role="gridcell"] a', timeout=15000) + + # Získej názvy reportů + names = page.evaluate(""" + () => Array.from(document.querySelectorAll('[role="gridcell"] a')) + .map(a => a.innerText.trim()) + .filter(n => n) + """) + print(f"\nNalezeno {len(names)} reportů, zjišťuji URL...\n") + + # Pro každý report klikni, zaznamenej URL a vrať se zpět + reports = [] + for name in names: + with page.expect_navigation(timeout=15000): + page.locator('[role="gridcell"] a').filter(has_text=name).click() + page.wait_for_load_state("networkidle") + page.wait_for_timeout(2000) + path = page.url.replace(BASE_URL, "") + reports.append({"name": name, "href": path}) + print(f" {name:50s} {path}") + # Průběžné uložení po každém reportu + with open("reports.json", "w", encoding="utf-8") as f: + json.dump(reports, f, ensure_ascii=False, indent=2) + if page.url != f"{BASE_URL}/reports": + page.goto(f"{BASE_URL}/reports") + page.wait_for_load_state("networkidle") + page.wait_for_timeout(2000) + page.wait_for_selector('[role="gridcell"] a', timeout=30000) + + browser.close() + + with open("reports.json", "w", encoding="utf-8") as f: + json.dump(reports, f, ensure_ascii=False, indent=2) + print(f"\nUloženo do reports.json") + + return reports + + +list_reports() diff --git a/reports.json b/reports.json new file mode 100644 index 0000000..c66e6eb --- /dev/null +++ b/reports.json @@ -0,0 +1,23 @@ +[ + {"name": "Drug Accountability Form - Multiple Subjects", "href": "/report/drug_accountability_form_multiple_subjects"}, + {"name": "Drug Accountability Form - Single Subject", "href": "/report/drug_accountability_form_single_subject"}, + {"name": "Janssen Pharmaceuticals IP Destruction Form", "href": "/report/ip_destruction_form"}, + {"name": "On-Site Drug Inventory and Accountability Details Form", "href": "/report/onsite_inventory_detail"}, + {"name": "On-Site Drug Inventory Form", "href": "/report/onsite_drug_inventory_form"}, + {"name": "Location Summary Report", "href": "/report/country_summary_report"}, + {"name": "Site Detail Report", "href": "/report/site_detail_report"}, + {"name": "Study Sites Report", "href": "/report/study_sites_report"}, + {"name": "Site Inventory Detail Report", "href": "/report/site_inventory_detail"}, + {"name": "Site Inventory Summary Report", "href": "/report/site_inventory_summary"}, + {"name": "Subject Data Changes Report", "href": "/report/patient_data_changes_report"}, + {"name": "Subject Detail Report", "href": "/report/patient_detail_report"}, + {"name": "Subject Summary Report", "href": "/report/patient_summary_report"}, + {"name": "Subject Visit Summary Report", "href": "/report/patient_visit_summary"}, + {"name": "Shipment Details Report", "href": "/report/shipment_details_report"}, + {"name": "Shipments Report", "href": "/report/shipments_report"}, + {"name": "Cohort History Report", "href": "/report/cohort_history_report"}, + {"name": "Cohort Summary Report", "href": "/report/cohort_summary_report"}, + {"name": "Site Activations Report", "href": "/report/site_activation_pivot"}, + {"name": "User Login History", "href": "/report/user_logins"}, + {"name": "Users List", "href": "/report/users"} +]