ordinaceprojekt/Insurance/StahováníZpráv/111 VZP/stahovanipodani.py

"""
Stáhni odeslaná podání z VZP Point (sekce „Odeslaná podání").
Načte Bearer token ze stránky Desk/FormDashboard, pak volá REST API /api/desk/form.
Stahuje podání s přiloženým výsledkovým souborem — přeskočí ty, co už existují.
Použití: python stahovanipodani.py [--dry-run]
"""

import json
import os
import re
import sys
import time
import winreg

try:
    import requests as req_lib
except ImportError:
    print("Chybí requests: pip install requests")
    sys.exit(1)

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", ".."))
from Knihovny.najdi_dropbox import get_dropbox_root

DASHBOARD_URL  = "https://point.vzp.cz/Desk/FormDashboard"
API_BASE       = "https://point.vzp.cz/api/desk/form"
PAGE_SIZE      = 50

CHROME_PROFILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "chrome_profile"))
COOKIES_FILE   = os.path.abspath(os.path.join(os.path.dirname(__file__), "vzp_cookies.json"))
DOWNLOAD_DIR   = os.path.join(
    get_dropbox_root(),
    "Ordinace", "Dokumentace_ke_zpracování", "Zúčtovací zprávy", "111 VZP Podání"
)

DRY_RUN = False


def load_cookies(context) -> int:
    if not os.path.exists(COOKIES_FILE):
        return 0
    try:
        with open(COOKIES_FILE, "r", encoding="utf-8") as f:
            cookies = json.load(f)
        context.add_cookies(cookies)
        return len(cookies)
    except Exception:
        return 0


def save_cookies(context) -> int:
    try:
        all_cookies = context.cookies()
        vzp = [c for c in all_cookies if "vzp.cz" in c.get("domain", "")]
        with open(COOKIES_FILE, "w", encoding="utf-8") as f:
            json.dump(vzp, f, indent=2, ensure_ascii=False)
        return len(vzp)
    except Exception:
        return 0


CERT_ISSUER_CN = "I.CA Public CA/RSA 06/2022"


def _set_chrome_cert_policy() -> None:
    policy = json.dumps({
        "pattern": "https://[*.]vzp.cz",
        "filter": {"ISSUER": {"CN": CERT_ISSUER_CN}},
    })
    key_path = r"SOFTWARE\Policies\Google\Chrome\AutoSelectCertificateForUrls"
    try:
        key = winreg.CreateKey(winreg.HKEY_CURRENT_USER, key_path)
        winreg.SetValueEx(key, "1", 0, winreg.REG_SZ, policy)
        winreg.CloseKey(key)
        print(f"  Chrome politika nastavena (issuer: {CERT_ISSUER_CN})")
    except Exception as e:
        print(f"  Varování: nelze nastavit Chrome politiku: {e}")


def extract_bearer_token(page) -> str | None:
    """Extrahuje Bearer token z inline <script> tagu vloženého do HTML stránky."""
    scripts = page.evaluate(
        "() => Array.from(document.querySelectorAll('script:not([src])')).map(s => s.textContent)"
    )
    for text in scripts:
        m = re.search(r'"bearerToken"\s*:\s*"([^"]+)"', text)
        if m:
            return m.group(1)
    return None


def fetch_all_forms(token: str) -> list[dict]:
    headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
    all_items: list[dict] = []
    page_num = 1
    while True:
        url = f"{API_BASE}?pageNumber={page_num}&pageSize={PAGE_SIZE}"
        r = req_lib.get(url, headers=headers, timeout=30)
        r.raise_for_status()
        data = r.json()
        items = data.get("items", [])
        all_items.extend(items)
        print(f"  Stránka {page_num}: {len(items)} podání (celkem {len(all_items)})")
        if not data.get("canLoadMore", False):
            break
        page_num += 1
    return all_items


def parse_date(iso: str) -> str:
    return iso[:10] if iso else "0000-00-00"


def download_file(token: str, form_id: int, file_id: str, dest: str) -> bool:
    # Krok 1: získej publicUri z API
    meta_url = f"{API_BASE}/{form_id}/result/{file_id}"
    try:
        r = req_lib.get(meta_url, headers={"Authorization": f"Bearer {token}"}, timeout=30)
        r.raise_for_status()
        public_uri = r.json().get("publicUri")
        if not public_uri:
            print(f"  Chyba: odpověď neobsahuje publicUri")
            return False
    except Exception as e:
        print(f"  Chyba načítání publicUri: {e}")
        return False

    # Krok 2: stáhni soubor přímo z publicUri (bez auth hlavičky)
    try:
        r = req_lib.get(public_uri, stream=True, timeout=60)
        r.raise_for_status()
        with open(dest, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
        return True
    except Exception as e:
        print(f"  Chyba stahování souboru: {e}")
        return False


def main() -> None:
    dry_run = DRY_RUN or "--dry-run" in sys.argv
    if dry_run:
        print("[dry-run] Pouze zobrazuji co by se stáhlo, nic nestahuju.\n")

    try:
        from playwright.sync_api import sync_playwright
    except ImportError:
        print("Chybí playwright: pip install playwright && playwright install chrome")
        sys.exit(1)

    os.makedirs(DOWNLOAD_DIR, exist_ok=True)
    _set_chrome_cert_policy()

    token = None

    with sync_playwright() as p:
        context = p.chromium.launch_persistent_context(
            user_data_dir=CHROME_PROFILE,
            channel="chrome",
            headless=False,
            slow_mo=100,
            ignore_https_errors=True,
            accept_downloads=True,
            args=["--force-renderer-accessibility"],
        )
        try:
            loaded = load_cookies(context)
            print(f"Cookies načtené z JSON: {loaded}")

            page = context.new_page()

            print("Naviguji na VZP Point Odeslaná podání...")
            try:
                page.goto(DASHBOARD_URL, wait_until="domcontentloaded", timeout=30_000)
            except Exception as e:
                print(f"Navigace: {e}")

            if page.url.startswith("https://auth.vzp.cz/signin"):
                print("Přihlašovací stránka — klikám na 'Certifikát'...")
                cert_btn = page.locator("a, button").filter(has_text=re.compile(r"certifikát", re.I)).first
                cert_btn.wait_for(state="visible", timeout=10_000)
                cert_btn.click(no_wait_after=True)
                print("Pokud se zobrazí dialog výběru certifikátu, vyberte ho ručně (max 60 s)...")
                time.sleep(60)
                page = context.new_page()
                try:
                    page.goto(DASHBOARD_URL, wait_until="domcontentloaded", timeout=30_000)
                except Exception as e:
                    print(f"Navigace po auth: {e}")
                if not page.url.startswith("https://point.vzp.cz"):
                    print(f"Přihlášení selhalo. URL: {page.url}")
                    return

            print("Přihlášení OK.")
            page.wait_for_load_state("networkidle", timeout=15_000)

            token = extract_bearer_token(page)
            if token:
                print("Bearer token načten.")
            else:
                print("Nepodařilo se načíst Bearer token ze stránky.")

        finally:
            saved = save_cookies(context)
            print(f"Uloženo {saved} VZP cookies.")
            context.close()

    if not token:
        sys.exit(1)

    print("\nNačítám seznam podání...")
    try:
        forms = fetch_all_forms(token)
    except Exception as e:
        print(f"Chyba načítání podání: {e}")
        sys.exit(1)

    existing = set(os.listdir(DOWNLOAD_DIR))
    print(f"\nV archivu: {len(existing)} souborů.")
    print(f"Celkem podání v API: {len(forms)}\n")

    downloaded = 0
    skipped = 0
    no_file = 0

    for form in forms:
        result = form.get("result") or {}
        result_file = result.get("resultFile") or {}
        file_id = result_file.get("fileId")
        orig_name = result_file.get("name", "")

        if not file_id or not orig_name:
            no_file += 1
            continue

        date_str = parse_date(form.get("created", ""))
        filename = f"{date_str} {orig_name}"
        state = form.get("state", "")

        if filename in existing:
            print(f"  ✓ {filename}")
            skipped += 1
            continue

        size = result_file.get("size", 0)
        print(f"  ↓ {filename}  ({size:,} B)  [{state}]")

        if dry_run:
            downloaded += 1
            continue

        dest = os.path.join(DOWNLOAD_DIR, filename)
        if download_file(token, form["id"], file_id, dest):
            existing.add(filename)
            downloaded += 1

    print()
    if dry_run:
        print(f"[dry-run] Ke stažení: {downloaded}, přeskočeno: {skipped}, bez souboru: {no_file}")
    else:
        print(f"Staženo: {downloaded}, přeskočeno (již existovalo): {skipped}, bez souboru: {no_file}")


if __name__ == "__main__":
    main()