266 lines
8.5 KiB
Python
266 lines
8.5 KiB
Python
"""
|
|
Stáhni odeslaná podání z VZP Point (sekce „Odeslaná podání").
|
|
Načte Bearer token ze stránky Desk/FormDashboard, pak volá REST API /api/desk/form.
|
|
Stahuje podání s přiloženým výsledkovým souborem — přeskočí ty, co už existují.
|
|
Použití: python stahovanipodani.py [--dry-run]
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
import time
|
|
import winreg
|
|
|
|
try:
|
|
import requests as req_lib
|
|
except ImportError:
|
|
print("Chybí requests: pip install requests")
|
|
sys.exit(1)
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
|
from Knihovny.najdi_dropbox import get_dropbox_root
|
|
|
|
DASHBOARD_URL = "https://point.vzp.cz/Desk/FormDashboard"
|
|
API_BASE = "https://point.vzp.cz/api/desk/form"
|
|
PAGE_SIZE = 50
|
|
|
|
CHROME_PROFILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "chrome_profile"))
|
|
COOKIES_FILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "vzp_cookies.json"))
|
|
DOWNLOAD_DIR = os.path.join(
|
|
get_dropbox_root(),
|
|
"Ordinace", "Dokumentace_ke_zpracování", "Zúčtovací zprávy", "111 VZP Podání"
|
|
)
|
|
|
|
DRY_RUN = False
|
|
|
|
|
|
def load_cookies(context) -> int:
|
|
if not os.path.exists(COOKIES_FILE):
|
|
return 0
|
|
try:
|
|
with open(COOKIES_FILE, "r", encoding="utf-8") as f:
|
|
cookies = json.load(f)
|
|
context.add_cookies(cookies)
|
|
return len(cookies)
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
def save_cookies(context) -> int:
|
|
try:
|
|
all_cookies = context.cookies()
|
|
vzp = [c for c in all_cookies if "vzp.cz" in c.get("domain", "")]
|
|
with open(COOKIES_FILE, "w", encoding="utf-8") as f:
|
|
json.dump(vzp, f, indent=2, ensure_ascii=False)
|
|
return len(vzp)
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
CERT_ISSUER_CN = "I.CA Public CA/RSA 06/2022"
|
|
|
|
|
|
def _set_chrome_cert_policy() -> None:
|
|
policy = json.dumps({
|
|
"pattern": "https://[*.]vzp.cz",
|
|
"filter": {"ISSUER": {"CN": CERT_ISSUER_CN}},
|
|
})
|
|
key_path = r"SOFTWARE\Policies\Google\Chrome\AutoSelectCertificateForUrls"
|
|
try:
|
|
key = winreg.CreateKey(winreg.HKEY_CURRENT_USER, key_path)
|
|
winreg.SetValueEx(key, "1", 0, winreg.REG_SZ, policy)
|
|
winreg.CloseKey(key)
|
|
print(f" Chrome politika nastavena (issuer: {CERT_ISSUER_CN})")
|
|
except Exception as e:
|
|
print(f" Varování: nelze nastavit Chrome politiku: {e}")
|
|
|
|
|
|
def extract_bearer_token(page) -> str | None:
|
|
"""Extrahuje Bearer token z inline <script> tagu vloženého do HTML stránky."""
|
|
scripts = page.evaluate(
|
|
"() => Array.from(document.querySelectorAll('script:not([src])')).map(s => s.textContent)"
|
|
)
|
|
for text in scripts:
|
|
m = re.search(r'"bearerToken"\s*:\s*"([^"]+)"', text)
|
|
if m:
|
|
return m.group(1)
|
|
return None
|
|
|
|
|
|
def fetch_all_forms(token: str) -> list[dict]:
|
|
headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
|
|
all_items: list[dict] = []
|
|
page_num = 1
|
|
while True:
|
|
url = f"{API_BASE}?pageNumber={page_num}&pageSize={PAGE_SIZE}"
|
|
r = req_lib.get(url, headers=headers, timeout=30)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
items = data.get("items", [])
|
|
all_items.extend(items)
|
|
print(f" Stránka {page_num}: {len(items)} podání (celkem {len(all_items)})")
|
|
if not data.get("canLoadMore", False):
|
|
break
|
|
page_num += 1
|
|
return all_items
|
|
|
|
|
|
def parse_date(iso: str) -> str:
|
|
return iso[:10] if iso else "0000-00-00"
|
|
|
|
|
|
def download_file(token: str, form_id: int, file_id: str, dest: str) -> bool:
|
|
# Krok 1: získej publicUri z API
|
|
meta_url = f"{API_BASE}/{form_id}/result/{file_id}"
|
|
try:
|
|
r = req_lib.get(meta_url, headers={"Authorization": f"Bearer {token}"}, timeout=30)
|
|
r.raise_for_status()
|
|
public_uri = r.json().get("publicUri")
|
|
if not public_uri:
|
|
print(f" Chyba: odpověď neobsahuje publicUri")
|
|
return False
|
|
except Exception as e:
|
|
print(f" Chyba načítání publicUri: {e}")
|
|
return False
|
|
|
|
# Krok 2: stáhni soubor přímo z publicUri (bez auth hlavičky)
|
|
try:
|
|
r = req_lib.get(public_uri, stream=True, timeout=60)
|
|
r.raise_for_status()
|
|
with open(dest, "wb") as f:
|
|
for chunk in r.iter_content(chunk_size=8192):
|
|
f.write(chunk)
|
|
return True
|
|
except Exception as e:
|
|
print(f" Chyba stahování souboru: {e}")
|
|
return False
|
|
|
|
|
|
def main() -> None:
|
|
dry_run = DRY_RUN or "--dry-run" in sys.argv
|
|
if dry_run:
|
|
print("[dry-run] Pouze zobrazuji co by se stáhlo, nic nestahuju.\n")
|
|
|
|
try:
|
|
from playwright.sync_api import sync_playwright
|
|
except ImportError:
|
|
print("Chybí playwright: pip install playwright && playwright install chrome")
|
|
sys.exit(1)
|
|
|
|
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
|
|
_set_chrome_cert_policy()
|
|
|
|
token = None
|
|
|
|
with sync_playwright() as p:
|
|
context = p.chromium.launch_persistent_context(
|
|
user_data_dir=CHROME_PROFILE,
|
|
channel="chrome",
|
|
headless=False,
|
|
slow_mo=100,
|
|
ignore_https_errors=True,
|
|
accept_downloads=True,
|
|
args=["--force-renderer-accessibility"],
|
|
)
|
|
try:
|
|
loaded = load_cookies(context)
|
|
print(f"Cookies načtené z JSON: {loaded}")
|
|
|
|
page = context.new_page()
|
|
|
|
print("Naviguji na VZP Point Odeslaná podání...")
|
|
try:
|
|
page.goto(DASHBOARD_URL, wait_until="domcontentloaded", timeout=30_000)
|
|
except Exception as e:
|
|
print(f"Navigace: {e}")
|
|
|
|
if page.url.startswith("https://auth.vzp.cz/signin"):
|
|
print("Přihlašovací stránka — klikám na 'Certifikát'...")
|
|
cert_btn = page.locator("a, button").filter(has_text=re.compile(r"certifikát", re.I)).first
|
|
cert_btn.wait_for(state="visible", timeout=10_000)
|
|
cert_btn.click(no_wait_after=True)
|
|
print("Pokud se zobrazí dialog výběru certifikátu, vyberte ho ručně (max 60 s)...")
|
|
time.sleep(60)
|
|
page = context.new_page()
|
|
try:
|
|
page.goto(DASHBOARD_URL, wait_until="domcontentloaded", timeout=30_000)
|
|
except Exception as e:
|
|
print(f"Navigace po auth: {e}")
|
|
if not page.url.startswith("https://point.vzp.cz"):
|
|
print(f"Přihlášení selhalo. URL: {page.url}")
|
|
return
|
|
|
|
print("Přihlášení OK.")
|
|
page.wait_for_load_state("networkidle", timeout=15_000)
|
|
|
|
token = extract_bearer_token(page)
|
|
if token:
|
|
print("Bearer token načten.")
|
|
else:
|
|
print("Nepodařilo se načíst Bearer token ze stránky.")
|
|
|
|
finally:
|
|
saved = save_cookies(context)
|
|
print(f"Uloženo {saved} VZP cookies.")
|
|
context.close()
|
|
|
|
if not token:
|
|
sys.exit(1)
|
|
|
|
print("\nNačítám seznam podání...")
|
|
try:
|
|
forms = fetch_all_forms(token)
|
|
except Exception as e:
|
|
print(f"Chyba načítání podání: {e}")
|
|
sys.exit(1)
|
|
|
|
existing = set(os.listdir(DOWNLOAD_DIR))
|
|
print(f"\nV archivu: {len(existing)} souborů.")
|
|
print(f"Celkem podání v API: {len(forms)}\n")
|
|
|
|
downloaded = 0
|
|
skipped = 0
|
|
no_file = 0
|
|
|
|
for form in forms:
|
|
result = form.get("result") or {}
|
|
result_file = result.get("resultFile") or {}
|
|
file_id = result_file.get("fileId")
|
|
orig_name = result_file.get("name", "")
|
|
|
|
if not file_id or not orig_name:
|
|
no_file += 1
|
|
continue
|
|
|
|
date_str = parse_date(form.get("created", ""))
|
|
filename = f"{date_str} {orig_name}"
|
|
state = form.get("state", "")
|
|
|
|
if filename in existing:
|
|
print(f" ✓ {filename}")
|
|
skipped += 1
|
|
continue
|
|
|
|
size = result_file.get("size", 0)
|
|
print(f" ↓ {filename} ({size:,} B) [{state}]")
|
|
|
|
if dry_run:
|
|
downloaded += 1
|
|
continue
|
|
|
|
dest = os.path.join(DOWNLOAD_DIR, filename)
|
|
if download_file(token, form["id"], file_id, dest):
|
|
existing.add(filename)
|
|
downloaded += 1
|
|
|
|
print()
|
|
if dry_run:
|
|
print(f"[dry-run] Ke stažení: {downloaded}, přeskočeno: {skipped}, bez souboru: {no_file}")
|
|
else:
|
|
print(f"Staženo: {downloaded}, přeskočeno (již existovalo): {skipped}, bez souboru: {no_file}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|