Initial commit — clean history (removed large test files, browser profiles, Medidata/Clario downloads)
This commit is contained in:
@@ -0,0 +1,165 @@
|
||||
"""Exploration script — Device Deficiency forms in EvaMed DRY study."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
BASE_URL = "https://prod.evamed.com/etude/soft/index.php"
|
||||
LOGIN_URL = f"{BASE_URL}?module=authentification&class=login&client=myopowers-dry"
|
||||
LIST_URL = f"{BASE_URL}?module=monitoring&class=formslisting¢er_id=2&formtype=121&l=ALL"
|
||||
LOGIN = "vbuzalka"
|
||||
PASSWORD = "Vlado9674+"
|
||||
|
||||
SCREENSHOTS_DIR = Path(__file__).parent / "screenshots_dd"
|
||||
SESSION_FILE = Path(__file__).parent / "session.json"
|
||||
|
||||
|
||||
async def main():
|
||||
SCREENSHOTS_DIR.mkdir(exist_ok=True)
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=False)
|
||||
|
||||
if SESSION_FILE.exists():
|
||||
context = await browser.new_context(storage_state=str(SESSION_FILE))
|
||||
print("Loaded saved session")
|
||||
else:
|
||||
context = await browser.new_context(viewport={"width": 1400, "height": 900})
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
# Login if needed
|
||||
await page.goto(LIST_URL)
|
||||
await page.wait_for_load_state("networkidle")
|
||||
if "authentification" in page.url:
|
||||
print("Logging in...")
|
||||
await page.goto(LOGIN_URL)
|
||||
await page.wait_for_load_state("networkidle")
|
||||
await page.locator("#login").fill(LOGIN)
|
||||
await page.locator('input[type="password"]').first.fill(PASSWORD)
|
||||
await page.click('input[value="Connection"]')
|
||||
await page.wait_for_load_state("networkidle")
|
||||
await context.storage_state(path=str(SESSION_FILE))
|
||||
print("Session saved")
|
||||
await page.goto(LIST_URL)
|
||||
await page.wait_for_load_state("networkidle")
|
||||
else:
|
||||
print("Session valid")
|
||||
|
||||
await page.screenshot(path=str(SCREENSHOTS_DIR / "01_dd_listing.png"), full_page=False)
|
||||
print("Screenshot: DD listing")
|
||||
|
||||
# Get all DD form links from the listing
|
||||
form_infos = await page.evaluate("""() => {
|
||||
const results = [];
|
||||
document.querySelectorAll('a[title="Open form"]').forEach(a => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
const m = href.match(/id=(\\d+)/);
|
||||
if (!m) return;
|
||||
const row = a.closest('tr');
|
||||
const dirLink = row ? row.querySelector('a[title="Open directory"]') : null;
|
||||
const patientCode = dirLink ? dirLink.innerText.trim() : '';
|
||||
const cells = row ? Array.from(row.querySelectorAll('td')).map(c => c.innerText.trim()) : [];
|
||||
results.push({ formId: m[1], patientCode, cells });
|
||||
});
|
||||
return results;
|
||||
}""")
|
||||
|
||||
print(f"\nFound {len(form_infos)} Device Deficiency forms")
|
||||
for i, info in enumerate(form_infos[:5]):
|
||||
print(f" [{i}] form_id={info['formId']} patient={info['patientCode']} cells={info['cells']}")
|
||||
|
||||
if not form_infos:
|
||||
print("NO DD FORMS FOUND!")
|
||||
# Save HTML for debugging
|
||||
html = await page.content()
|
||||
(SCREENSHOTS_DIR / "01_dd_listing.html").write_text(html, encoding="utf-8")
|
||||
await browser.close()
|
||||
return
|
||||
|
||||
# Open the first DD form
|
||||
first = form_infos[0]
|
||||
form_url = f"{BASE_URL}?module=dossier&class=file&event=show&id={first['formId']}#fiche"
|
||||
print(f"\nOpening DD form: {form_url}")
|
||||
await page.goto(form_url)
|
||||
await page.wait_for_load_state("networkidle")
|
||||
await page.screenshot(path=str(SCREENSHOTS_DIR / "02_dd_form.png"), full_page=True)
|
||||
print("Screenshot: DD form")
|
||||
|
||||
# Extract fields using the same pattern as AE (span.label + span.valeur)
|
||||
fields_label_value = await page.evaluate("""() => {
|
||||
const fields = [];
|
||||
document.querySelectorAll('.tableauFormulaire span.label').forEach(label => {
|
||||
const key = label.innerText.trim();
|
||||
const valEl = label.nextElementSibling;
|
||||
const value = valEl ? valEl.innerText.trim() : null;
|
||||
const valClass = valEl ? valEl.className : '';
|
||||
fields.push({ key, value, valueClass: valClass });
|
||||
});
|
||||
return fields;
|
||||
}""")
|
||||
|
||||
print(f"\n=== Fields (span.label -> span.valeur) : {len(fields_label_value)} ===")
|
||||
for f in fields_label_value:
|
||||
print(f" {f['key']:40s} = {f['value']}")
|
||||
|
||||
# Also explore table structure for any additional patterns
|
||||
table_structure = await page.evaluate("""() => {
|
||||
const sections = [];
|
||||
document.querySelectorAll('.tableauFormulaire').forEach((table, ti) => {
|
||||
const rows = [];
|
||||
table.querySelectorAll('tr').forEach((tr, ri) => {
|
||||
const cells = Array.from(tr.querySelectorAll('td, th')).map(c => ({
|
||||
tag: c.tagName,
|
||||
class: c.className,
|
||||
colspan: c.colSpan,
|
||||
text: c.innerText.trim().substring(0, 200),
|
||||
childSpans: Array.from(c.querySelectorAll('span')).map(s => ({
|
||||
class: s.className,
|
||||
text: s.innerText.trim().substring(0, 200)
|
||||
}))
|
||||
}));
|
||||
if (cells.length > 0) rows.push({ rowIndex: ri, cells });
|
||||
});
|
||||
sections.push({ tableIndex: ti, rowCount: rows.length, rows: rows.slice(0, 30) });
|
||||
});
|
||||
return sections;
|
||||
}""")
|
||||
|
||||
print(f"\n=== Table structure: {len(table_structure)} tableauFormulaire blocks ===")
|
||||
for sec in table_structure:
|
||||
print(f"\n Table #{sec['tableIndex']} ({sec['rowCount']} rows):")
|
||||
for row in sec['rows'][:15]:
|
||||
for cell in row['cells']:
|
||||
spans_info = " | ".join(f"[{s['class']}]{s['text'][:60]}" for s in cell['childSpans'])
|
||||
print(f" row{row['rowIndex']} <{cell['tag']} class='{cell['class']}' colspan={cell['colspan']}> "
|
||||
f"{cell['text'][:80]}")
|
||||
if spans_info:
|
||||
print(f" spans: {spans_info}")
|
||||
|
||||
# Save full form HTML
|
||||
html = await page.content()
|
||||
(SCREENSHOTS_DIR / "02_dd_form.html").write_text(html, encoding="utf-8")
|
||||
print("\nSaved: full DD form HTML")
|
||||
|
||||
# Save extracted data as JSON for easy review
|
||||
result = {
|
||||
"form_id": first['formId'],
|
||||
"patient_code": first['patientCode'],
|
||||
"listing_cells": first['cells'],
|
||||
"fields": fields_label_value,
|
||||
"table_structure": table_structure,
|
||||
}
|
||||
(SCREENSHOTS_DIR / "dd_form_data.json").write_text(
|
||||
json.dumps(result, indent=2, ensure_ascii=False), encoding="utf-8"
|
||||
)
|
||||
print("Saved: dd_form_data.json")
|
||||
|
||||
await browser.close()
|
||||
print("\nDone")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user