notebookvb

2026-05-03 05:51:43 +02:00
parent 88602cb406
commit d013e43d34
5 changed files with 270 additions and 2 deletions
@@ -0,0 +1,265 @@
+"""
+Stáhni odeslaná podání z VZP Point (sekce „Odeslaná podání").
+Načte Bearer token ze stránky Desk/FormDashboard, pak volá REST API /api/desk/form.
+Stahuje podání s přiloženým výsledkovým souborem — přeskočí ty, co už existují.
+Použití: python stahovanipodani.py [--dry-run]
+"""
+
+import json
+import os
+import re
+import sys
+import time
+import winreg
+
+try:
+    import requests as req_lib
+except ImportError:
+    print("Chybí requests: pip install requests")
+    sys.exit(1)
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", ".."))
+from Knihovny.najdi_dropbox import get_dropbox_root
+
+DASHBOARD_URL  = "https://point.vzp.cz/Desk/FormDashboard"
+API_BASE       = "https://point.vzp.cz/api/desk/form"
+PAGE_SIZE      = 50
+
+CHROME_PROFILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "chrome_profile"))
+COOKIES_FILE   = os.path.abspath(os.path.join(os.path.dirname(__file__), "vzp_cookies.json"))
+DOWNLOAD_DIR   = os.path.join(
+    get_dropbox_root(),
+    "Ordinace", "Dokumentace_ke_zpracování", "Zúčtovací zprávy", "111 VZP Podání"
+)
+
+DRY_RUN = False
+
+
+def load_cookies(context) -> int:
+    if not os.path.exists(COOKIES_FILE):
+        return 0
+    try:
+        with open(COOKIES_FILE, "r", encoding="utf-8") as f:
+            cookies = json.load(f)
+        context.add_cookies(cookies)
+        return len(cookies)
+    except Exception:
+        return 0
+
+
+def save_cookies(context) -> int:
+    try:
+        all_cookies = context.cookies()
+        vzp = [c for c in all_cookies if "vzp.cz" in c.get("domain", "")]
+        with open(COOKIES_FILE, "w", encoding="utf-8") as f:
+            json.dump(vzp, f, indent=2, ensure_ascii=False)
+        return len(vzp)
+    except Exception:
+        return 0
+
+
+CERT_ISSUER_CN = "I.CA Public CA/RSA 06/2022"
+
+
+def _set_chrome_cert_policy() -> None:
+    policy = json.dumps({
+        "pattern": "https://[*.]vzp.cz",
+        "filter": {"ISSUER": {"CN": CERT_ISSUER_CN}},
+    })
+    key_path = r"SOFTWARE\Policies\Google\Chrome\AutoSelectCertificateForUrls"
+    try:
+        key = winreg.CreateKey(winreg.HKEY_CURRENT_USER, key_path)
+        winreg.SetValueEx(key, "1", 0, winreg.REG_SZ, policy)
+        winreg.CloseKey(key)
+        print(f"  Chrome politika nastavena (issuer: {CERT_ISSUER_CN})")
+    except Exception as e:
+        print(f"  Varování: nelze nastavit Chrome politiku: {e}")
+
+
+def extract_bearer_token(page) -> str | None:
+    """Extrahuje Bearer token z inline <script> tagu vloženého do HTML stránky."""
+    scripts = page.evaluate(
+        "() => Array.from(document.querySelectorAll('script:not([src])')).map(s => s.textContent)"
+    )
+    for text in scripts:
+        m = re.search(r'"bearerToken"\s*:\s*"([^"]+)"', text)
+        if m:
+            return m.group(1)
+    return None
+
+
+def fetch_all_forms(token: str) -> list[dict]:
+    headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
+    all_items: list[dict] = []
+    page_num = 1
+    while True:
+        url = f"{API_BASE}?pageNumber={page_num}&pageSize={PAGE_SIZE}"
+        r = req_lib.get(url, headers=headers, timeout=30)
+        r.raise_for_status()
+        data = r.json()
+        items = data.get("items", [])
+        all_items.extend(items)
+        print(f"  Stránka {page_num}: {len(items)} podání (celkem {len(all_items)})")
+        if not data.get("canLoadMore", False):
+            break
+        page_num += 1
+    return all_items
+
+
+def parse_date(iso: str) -> str:
+    return iso[:10] if iso else "0000-00-00"
+
+
+def download_file(token: str, form_id: int, file_id: str, dest: str) -> bool:
+    # Krok 1: získej publicUri z API
+    meta_url = f"{API_BASE}/{form_id}/result/{file_id}"
+    try:
+        r = req_lib.get(meta_url, headers={"Authorization": f"Bearer {token}"}, timeout=30)
+        r.raise_for_status()
+        public_uri = r.json().get("publicUri")
+        if not public_uri:
+            print(f"  Chyba: odpověď neobsahuje publicUri")
+            return False
+    except Exception as e:
+        print(f"  Chyba načítání publicUri: {e}")
+        return False
+
+    # Krok 2: stáhni soubor přímo z publicUri (bez auth hlavičky)
+    try:
+        r = req_lib.get(public_uri, stream=True, timeout=60)
+        r.raise_for_status()
+        with open(dest, "wb") as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
+        return True
+    except Exception as e:
+        print(f"  Chyba stahování souboru: {e}")
+        return False
+
+
+def main() -> None:
+    dry_run = DRY_RUN or "--dry-run" in sys.argv
+    if dry_run:
+        print("[dry-run] Pouze zobrazuji co by se stáhlo, nic nestahuju.\n")
+
+    try:
+        from playwright.sync_api import sync_playwright
+    except ImportError:
+        print("Chybí playwright: pip install playwright && playwright install chrome")
+        sys.exit(1)
+
+    os.makedirs(DOWNLOAD_DIR, exist_ok=True)
+    _set_chrome_cert_policy()
+
+    token = None
+
+    with sync_playwright() as p:
+        context = p.chromium.launch_persistent_context(
+            user_data_dir=CHROME_PROFILE,
+            channel="chrome",
+            headless=False,
+            slow_mo=100,
+            ignore_https_errors=True,
+            accept_downloads=True,
+            args=["--force-renderer-accessibility"],
+        )
+        try:
+            loaded = load_cookies(context)
+            print(f"Cookies načtené z JSON: {loaded}")
+
+            page = context.new_page()
+
+            print("Naviguji na VZP Point Odeslaná podání...")
+            try:
+                page.goto(DASHBOARD_URL, wait_until="domcontentloaded", timeout=30_000)
+            except Exception as e:
+                print(f"Navigace: {e}")
+
+            if page.url.startswith("https://auth.vzp.cz/signin"):
+                print("Přihlašovací stránka — klikám na 'Certifikát'...")
+                cert_btn = page.locator("a, button").filter(has_text=re.compile(r"certifikát", re.I)).first
+                cert_btn.wait_for(state="visible", timeout=10_000)
+                cert_btn.click(no_wait_after=True)
+                print("Pokud se zobrazí dialog výběru certifikátu, vyberte ho ručně (max 60 s)...")
+                time.sleep(60)
+                page = context.new_page()
+                try:
+                    page.goto(DASHBOARD_URL, wait_until="domcontentloaded", timeout=30_000)
+                except Exception as e:
+                    print(f"Navigace po auth: {e}")
+                if not page.url.startswith("https://point.vzp.cz"):
+                    print(f"Přihlášení selhalo. URL: {page.url}")
+                    return
+
+            print("Přihlášení OK.")
+            page.wait_for_load_state("networkidle", timeout=15_000)
+
+            token = extract_bearer_token(page)
+            if token:
+                print("Bearer token načten.")
+            else:
+                print("Nepodařilo se načíst Bearer token ze stránky.")
+
+        finally:
+            saved = save_cookies(context)
+            print(f"Uloženo {saved} VZP cookies.")
+            context.close()
+
+    if not token:
+        sys.exit(1)
+
+    print("\nNačítám seznam podání...")
+    try:
+        forms = fetch_all_forms(token)
+    except Exception as e:
+        print(f"Chyba načítání podání: {e}")
+        sys.exit(1)
+
+    existing = set(os.listdir(DOWNLOAD_DIR))
+    print(f"\nV archivu: {len(existing)} souborů.")
+    print(f"Celkem podání v API: {len(forms)}\n")
+
+    downloaded = 0
+    skipped = 0
+    no_file = 0
+
+    for form in forms:
+        result = form.get("result") or {}
+        result_file = result.get("resultFile") or {}
+        file_id = result_file.get("fileId")
+        orig_name = result_file.get("name", "")
+
+        if not file_id or not orig_name:
+            no_file += 1
+            continue
+
+        date_str = parse_date(form.get("created", ""))
+        filename = f"{date_str} {orig_name}"
+        state = form.get("state", "")
+
+        if filename in existing:
+            print(f"  ✓ {filename}")
+            skipped += 1
+            continue
+
+        size = result_file.get("size", 0)
+        print(f"  ↓ {filename}  ({size:,} B)  [{state}]")
+
+        if dry_run:
+            downloaded += 1
+            continue
+
+        dest = os.path.join(DOWNLOAD_DIR, filename)
+        if download_file(token, form["id"], file_id, dest):
+            existing.add(filename)
+            downloaded += 1
+
+    print()
+    if dry_run:
+        print(f"[dry-run] Ke stažení: {downloaded}, přeskočeno: {skipped}, bez souboru: {no_file}")
+    else:
+        print(f"Staženo: {downloaded}, přeskočeno (již existovalo): {skipped}, bez souboru: {no_file}")
+
+
+if __name__ == "__main__":
+    main()