""" Surgery (V0 Implantation Visit) Import — scrapes from EvaMed DRY study and upserts into MongoDB. Run repeatedly; only stores field-level changes (delta) in history[]. Unique key: _form_id (each form has a unique ID in EvaMed). MongoDB: db=Dry, collection=Surgery """ import asyncio import re from datetime import datetime from pathlib import Path from playwright.async_api import async_playwright from pymongo import MongoClient BASE_URL = "https://prod.evamed.com/etude/soft/index.php" LOGIN_URL = f"{BASE_URL}?module=authentification&class=login&client=myopowers-dry" LIST_URL = f"{BASE_URL}?module=monitoring&class=formslisting¢er_id=2&formtype=10&l=ALL" LOGIN = "vbuzalka" PASSWORD = "Vlado9674+" MONGO_HOST = "192.168.1.76" DB_NAME = "Dry" COLLECTION = "Surgery" SESSION_FILE = Path(__file__).parent / "session.json" DATE_RE = re.compile(r"^(\d{2})/(\d{2})/(\d{4})$") def parse_value(value): """Parse DD/MM/YYYY -> datetime, digit-only -> int, else str. None if empty.""" if not value or not value.strip(): return None v = value.strip() m = DATE_RE.fullmatch(v) if m: return datetime(int(m.group(3)), int(m.group(2)), int(m.group(1))) if re.fullmatch(r"\d+", v): return int(v) return v async def do_login(page): await page.goto(LOGIN_URL) await page.wait_for_load_state("networkidle") await page.locator("#login").fill(LOGIN) await page.locator('input[type="password"]').first.fill(PASSWORD) await page.click('input[value="Connection"]') await page.wait_for_load_state("networkidle") async def get_form_ids(page): """Return list of {formId, patientCode} from the filtered forms list.""" await page.goto(LIST_URL) await page.wait_for_load_state("networkidle") return await page.evaluate("""() => { const results = []; document.querySelectorAll('a[title="Open form"]').forEach(a => { const href = a.getAttribute('href') || ''; const m = href.match(/id=(\\d+)/); if (!m) return; const row = a.closest('tr'); const dirLink = row ? row.querySelector('a[title="Open directory"]') : null; const patientCode = dirLink ? dirLink.innerText.trim() : ''; results.push({ formId: m[1], patientCode: patientCode }); }); return results; }""") async def extract_form_fields(page, form_id): """Navigate to form and extract all field values.""" url = f"{BASE_URL}?module=dossier&class=file&event=show&id={form_id}#fiche" await page.goto(url) await page.wait_for_load_state("networkidle") raw = await page.evaluate("""() => { const fields = {}; document.querySelectorAll('.tableauFormulaire span.label').forEach(label => { const key = label.innerText.trim(); const valEl = label.nextElementSibling; fields[key] = valEl ? valEl.innerText.trim() || null : null; }); return fields; }""") parsed = {} for k, v in raw.items(): parsed[k] = parse_value(v) parsed['_form_id'] = int(form_id) return parsed def upsert(collection, doc, patient_code, now): form_id = doc['_form_id'] key = {"_form_id": form_id} existing = collection.find_one(key) skip = {'_form_id'} data = {k: v for k, v in doc.items() if k not in skip} if existing is None: collection.insert_one({ **key, "patient_code": patient_code, "data": data, "history": [], "first_seen_at": now, "last_seen_at": now, "deleted_at": None, }) print(f" NEW {patient_code} Surgery form_id={form_id}") return old_data = existing.get("data", {}) changes = {} for k in set(data) | set(old_data): old_v = old_data.get(k) new_v = data.get(k) if old_v != new_v: changes[k] = {"old": old_v, "new": new_v} update = {"$set": {"last_seen_at": now, "deleted_at": None, "patient_code": patient_code}} if changes: update["$set"]["data"] = data update["$push"] = {"history": {"timestamp": now, "changes": changes}} print(f" CHANGED {patient_code} Surgery form_id={form_id} -> {list(changes.keys())}") else: print(f" ok {patient_code} Surgery form_id={form_id}") collection.update_one(key, update) async def main(): mongo = MongoClient(MONGO_HOST) col = mongo[DB_NAME][COLLECTION] now = datetime.now() async with async_playwright() as p: browser = await p.chromium.launch(headless=False) if SESSION_FILE.exists(): context = await browser.new_context(storage_state=str(SESSION_FILE)) print("Loaded saved session") else: context = await browser.new_context(viewport={"width": 1400, "height": 900}) page = await context.new_page() await page.goto(LIST_URL) await page.wait_for_load_state("networkidle") if "authentification" in page.url: print("Logging in...") await do_login(page) await context.storage_state(path=str(SESSION_FILE)) print("Session saved") else: print("Session valid") form_infos = await get_form_ids(page) current_ids = {info['formId'] for info in form_infos} print(f"Found {len(form_infos)} Surgery forms") for info in form_infos: fid = info['formId'] print(f"Scraping form {fid} ({info['patientCode']})...") doc = await extract_form_fields(page, fid) upsert(col, doc, info['patientCode'], now) for rec in col.find({"deleted_at": None}, {"_form_id": 1, "patient_code": 1}): if str(rec.get('_form_id', '')) not in current_ids: col.update_one({"_id": rec["_id"]}, {"$set": {"deleted_at": now}}) print(f" DELETED form_id={rec['_form_id']} ({rec.get('patient_code')})") await browser.close() mongo.close() print(f"\nDone -- {len(form_infos)} forms processed at {now.isoformat()}") if __name__ == "__main__": asyncio.run(main())