z230
This commit is contained in:
@@ -4,10 +4,11 @@ import glob
|
||||
import datetime
|
||||
import re
|
||||
import json
|
||||
import mysql.connector
|
||||
|
||||
import sys
|
||||
import pandas as pd
|
||||
import db_config
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from common.mongo_writer import get_db
|
||||
|
||||
# ── CONFIG ──────────────────────────────────────────────────────────────────
|
||||
BASE_URL = "https://janssen.4gclinical.com"
|
||||
@@ -22,8 +23,9 @@ DETAILS_DIR = os.path.join(BASE_DIR, "IncomingSourceReportsDetails")
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def get_subjects(study):
|
||||
pattern = os.path.join(INCOMING_DIR, f"* {study} Subject Summary Report.xlsx")
|
||||
def get_subjects(study, source_dir=None):
|
||||
src = source_dir or INCOMING_DIR
|
||||
pattern = os.path.join(src, f"* {study} Subject Summary Report*.xlsx")
|
||||
files = sorted(
|
||||
[f for f in glob.glob(pattern) if not os.path.basename(f).startswith("~$")],
|
||||
key=os.path.getmtime,
|
||||
@@ -58,25 +60,22 @@ def strip_html(html):
|
||||
|
||||
|
||||
def get_existing_pks(study):
|
||||
"""Načte už importované pk notifikací pro studii z Mongo."""
|
||||
try:
|
||||
conn = mysql.connector.connect(
|
||||
host=db_config.DB_HOST, port=db_config.DB_PORT,
|
||||
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
|
||||
database=db_config.DB_NAME,
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT pk FROM iwrs_notifications WHERE study = %s", (study,))
|
||||
pks = {row[0] for row in cursor.fetchall()}
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return pks
|
||||
db = get_db()
|
||||
return {d["_id"] for d in db.iwrs_notifications.find(
|
||||
{"study": study}, {"_id": 1}
|
||||
)}
|
||||
except Exception as e:
|
||||
print(f" UPOZORNĚNÍ: nelze načíst pk z DB ({e}), stahuji vše")
|
||||
print(f" UPOZORNĚNÍ: nelze načíst pk z Mongo ({e}), stahuji vše")
|
||||
return set()
|
||||
|
||||
|
||||
def download_notifications_for_subject(page, study, subject, api_base, existing_pks, out_dir, table1_data):
|
||||
"""Stáhne notifikace pro subjekta z již zachycené table_1 response."""
|
||||
def download_notifications_for_subject(page, study, subject, api_base, existing_pks, out_dir, table1_data, flat=False):
|
||||
"""Stáhne notifikace pro subjekta z již zachycené table_1 response.
|
||||
|
||||
flat=True → název obsahuje study+subject (pro Incoming/ kde leží všechno pohromadě).
|
||||
"""
|
||||
new_count = 0
|
||||
for row in table1_data.get("data", []):
|
||||
for notif in (row.get("notification") or []):
|
||||
@@ -92,9 +91,13 @@ def download_notifications_for_subject(page, study, subject, api_base, existing_
|
||||
text = strip_html(body)
|
||||
actual_date = row.get("actual_date_raw", "0000-00-00")
|
||||
|
||||
pdf_filename = os.path.join(out_dir, f"{actual_date}_{safe_label}.pdf")
|
||||
if flat:
|
||||
stem = f"{actual_date}_{study}_{subject}_{safe_label}"
|
||||
else:
|
||||
stem = f"{actual_date}_{safe_label}"
|
||||
pdf_filename = os.path.join(out_dir, f"{stem}.pdf")
|
||||
if os.path.exists(pdf_filename):
|
||||
pdf_filename = os.path.join(out_dir, f"{actual_date}_{safe_label}_pk{pk}.pdf")
|
||||
pdf_filename = os.path.join(out_dir, f"{stem}_pk{pk}.pdf")
|
||||
|
||||
# Načti JWT čerstvě před každým requestem
|
||||
jwt = page.evaluate("localStorage.getItem('JWT.access')")
|
||||
@@ -126,11 +129,18 @@ def download_notifications_for_subject(page, study, subject, api_base, existing_
|
||||
return new_count
|
||||
|
||||
|
||||
def run(page, study):
|
||||
out_dir = os.path.join(DETAILS_DIR, study)
|
||||
def run(page, study, out_dir=None, subjects_source_dir=None):
|
||||
"""
|
||||
out_dir=None → legacy: ukládá do IncomingSourceReportsDetails/{study}/
|
||||
out_dir=cesta → vše ukládá ploše do té cesty (Incoming/).
|
||||
subjects_source_dir=None → čte summary z IncomingSourceReports/ (legacy).
|
||||
"""
|
||||
flat = out_dir is not None
|
||||
if out_dir is None:
|
||||
out_dir = os.path.join(DETAILS_DIR, study)
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
subjects = get_subjects(study)
|
||||
subjects = get_subjects(study, subjects_source_dir)
|
||||
print(f" Nalezeno {len(subjects)} subjektů")
|
||||
today = datetime.date.today().strftime("%Y-%m-%d")
|
||||
|
||||
@@ -216,7 +226,7 @@ def run(page, study):
|
||||
# Stáhnout notifikace pro tohoto subjekta
|
||||
if api_base and table1_data:
|
||||
n = download_notifications_for_subject(
|
||||
page, study, subject, api_base, existing_pks, out_dir, table1_data
|
||||
page, study, subject, api_base, existing_pks, out_dir, table1_data, flat=flat
|
||||
)
|
||||
total_notif += n
|
||||
|
||||
|
||||
Reference in New Issue
Block a user