This commit is contained in:
2026-06-03 15:56:52 +02:00
parent ea9d611719
commit 61c6aeea23
3254 changed files with 126 additions and 13241 deletions
+35 -25
View File
@@ -4,10 +4,11 @@ import glob
import datetime
import re
import json
import mysql.connector
import sys
import pandas as pd
import db_config
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common.mongo_writer import get_db
# ── CONFIG ──────────────────────────────────────────────────────────────────
BASE_URL = "https://janssen.4gclinical.com"
@@ -22,8 +23,9 @@ DETAILS_DIR = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
# ────────────────────────────────────────────────────────────────────────────
def get_subjects(study):
pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
def get_subjects(study, source_dir=None):
src = source_dir or INCOMING_DIR
pattern = os.path.join(src, f"* {study} Subject Summary Report*.xlsx")
files = sorted(
[f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
key=os.path.getmtime,
@@ -58,25 +60,22 @@ def strip_html(html):
def get_existing_pks(study):
"""Načte už importované pk notifikací pro studii z Mongo."""
try:
conn = mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
cursor = conn.cursor()
cursor.execute("SELECT pk FROM iwrs_notifications WHERE study = %s", (study,))
pks = {row[0] for row in cursor.fetchall()}
cursor.close()
conn.close()
return pks
db = get_db()
return {d["_id"] for d in db.iwrs_notifications.find(
{"study": study}, {"_id": 1}
)}
except Exception as e:
print(f" UPOZORNĚNÍ: nelze načíst pk z DB ({e}), stahuji vše")
print(f" UPOZORNĚNÍ: nelze načíst pk z Mongo ({e}), stahuji vše")
return set()
def download_notifications_for_subject(page, study, subject, api_base, existing_pks, out_dir, table1_data):
"""Stáhne notifikace pro subjekta z již zachycené table_1 response."""
def download_notifications_for_subject(page, study, subject, api_base, existing_pks, out_dir, table1_data, flat=False):
"""Stáhne notifikace pro subjekta z již zachycené table_1 response.
flat=True → název obsahuje study+subject (pro Incoming/ kde leží všechno pohromadě).
"""
new_count = 0
for row in table1_data.get("data", []):
for notif in (row.get("notification") or []):
@@ -92,9 +91,13 @@ def download_notifications_for_subject(page, study, subject, api_base, existing_
text = strip_html(body)
actual_date = row.get("actual_date_raw", "0000-00-00")
pdf_filename = os.path.join(out_dir, f"{actual_date}_{safe_label}.pdf")
if flat:
stem = f"{actual_date}_{study}_{subject}_{safe_label}"
else:
stem = f"{actual_date}_{safe_label}"
pdf_filename = os.path.join(out_dir, f"{stem}.pdf")
if os.path.exists(pdf_filename):
pdf_filename = os.path.join(out_dir, f"{actual_date}_{safe_label}_pk{pk}.pdf")
pdf_filename = os.path.join(out_dir, f"{stem}_pk{pk}.pdf")
# Načti JWT čerstvě před každým requestem
jwt = page.evaluate("localStorage.getItem('JWT.access')")
@@ -126,11 +129,18 @@ def download_notifications_for_subject(page, study, subject, api_base, existing_
return new_count
def run(page, study):
out_dir = os.path.join(DETAILS_DIR, study)
def run(page, study, out_dir=None, subjects_source_dir=None):
"""
out_dir=None → legacy: ukládá do IncomingSourceReportsDetails/{study}/
out_dir=cesta → vše ukládá ploše do té cesty (Incoming/).
subjects_source_dir=None → čte summary z IncomingSourceReports/ (legacy).
"""
flat = out_dir is not None
if out_dir is None:
out_dir = os.path.join(DETAILS_DIR, study)
os.makedirs(out_dir, exist_ok=True)
subjects = get_subjects(study)
subjects = get_subjects(study, subjects_source_dir)
print(f" Nalezeno {len(subjects)} subjektů")
today = datetime.date.today().strftime("%Y-%m-%d")
@@ -216,7 +226,7 @@ def run(page, study):
# Stáhnout notifikace pro tohoto subjekta
if api_base and table1_data:
n = download_notifications_for_subject(
page, study, subject, api_base, existing_pks, out_dir, table1_data
page, study, subject, api_base, existing_pks, out_dir, table1_data, flat=flat
)
total_notif += n