notebookvb
This commit is contained in:
@@ -0,0 +1,265 @@
|
||||
"""
|
||||
Stáhni odeslaná podání z VZP Point (sekce „Odeslaná podání").
|
||||
Načte Bearer token ze stránky Desk/FormDashboard, pak volá REST API /api/desk/form.
|
||||
Stahuje podání s přiloženým výsledkovým souborem — přeskočí ty, co už existují.
|
||||
Použití: python stahovanipodani.py [--dry-run]
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import winreg
|
||||
|
||||
try:
|
||||
import requests as req_lib
|
||||
except ImportError:
|
||||
print("Chybí requests: pip install requests")
|
||||
sys.exit(1)
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
||||
from Knihovny.najdi_dropbox import get_dropbox_root
|
||||
|
||||
DASHBOARD_URL = "https://point.vzp.cz/Desk/FormDashboard"
|
||||
API_BASE = "https://point.vzp.cz/api/desk/form"
|
||||
PAGE_SIZE = 50
|
||||
|
||||
CHROME_PROFILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "chrome_profile"))
|
||||
COOKIES_FILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "vzp_cookies.json"))
|
||||
DOWNLOAD_DIR = os.path.join(
|
||||
get_dropbox_root(),
|
||||
"Ordinace", "Dokumentace_ke_zpracování", "Zúčtovací zprávy", "111 VZP Podání"
|
||||
)
|
||||
|
||||
DRY_RUN = False
|
||||
|
||||
|
||||
def load_cookies(context) -> int:
|
||||
if not os.path.exists(COOKIES_FILE):
|
||||
return 0
|
||||
try:
|
||||
with open(COOKIES_FILE, "r", encoding="utf-8") as f:
|
||||
cookies = json.load(f)
|
||||
context.add_cookies(cookies)
|
||||
return len(cookies)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def save_cookies(context) -> int:
|
||||
try:
|
||||
all_cookies = context.cookies()
|
||||
vzp = [c for c in all_cookies if "vzp.cz" in c.get("domain", "")]
|
||||
with open(COOKIES_FILE, "w", encoding="utf-8") as f:
|
||||
json.dump(vzp, f, indent=2, ensure_ascii=False)
|
||||
return len(vzp)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
CERT_ISSUER_CN = "I.CA Public CA/RSA 06/2022"
|
||||
|
||||
|
||||
def _set_chrome_cert_policy() -> None:
|
||||
policy = json.dumps({
|
||||
"pattern": "https://[*.]vzp.cz",
|
||||
"filter": {"ISSUER": {"CN": CERT_ISSUER_CN}},
|
||||
})
|
||||
key_path = r"SOFTWARE\Policies\Google\Chrome\AutoSelectCertificateForUrls"
|
||||
try:
|
||||
key = winreg.CreateKey(winreg.HKEY_CURRENT_USER, key_path)
|
||||
winreg.SetValueEx(key, "1", 0, winreg.REG_SZ, policy)
|
||||
winreg.CloseKey(key)
|
||||
print(f" Chrome politika nastavena (issuer: {CERT_ISSUER_CN})")
|
||||
except Exception as e:
|
||||
print(f" Varování: nelze nastavit Chrome politiku: {e}")
|
||||
|
||||
|
||||
def extract_bearer_token(page) -> str | None:
|
||||
"""Extrahuje Bearer token z inline <script> tagu vloženého do HTML stránky."""
|
||||
scripts = page.evaluate(
|
||||
"() => Array.from(document.querySelectorAll('script:not([src])')).map(s => s.textContent)"
|
||||
)
|
||||
for text in scripts:
|
||||
m = re.search(r'"bearerToken"\s*:\s*"([^"]+)"', text)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def fetch_all_forms(token: str) -> list[dict]:
|
||||
headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
|
||||
all_items: list[dict] = []
|
||||
page_num = 1
|
||||
while True:
|
||||
url = f"{API_BASE}?pageNumber={page_num}&pageSize={PAGE_SIZE}"
|
||||
r = req_lib.get(url, headers=headers, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
items = data.get("items", [])
|
||||
all_items.extend(items)
|
||||
print(f" Stránka {page_num}: {len(items)} podání (celkem {len(all_items)})")
|
||||
if not data.get("canLoadMore", False):
|
||||
break
|
||||
page_num += 1
|
||||
return all_items
|
||||
|
||||
|
||||
def parse_date(iso: str) -> str:
|
||||
return iso[:10] if iso else "0000-00-00"
|
||||
|
||||
|
||||
def download_file(token: str, form_id: int, file_id: str, dest: str) -> bool:
|
||||
# Krok 1: získej publicUri z API
|
||||
meta_url = f"{API_BASE}/{form_id}/result/{file_id}"
|
||||
try:
|
||||
r = req_lib.get(meta_url, headers={"Authorization": f"Bearer {token}"}, timeout=30)
|
||||
r.raise_for_status()
|
||||
public_uri = r.json().get("publicUri")
|
||||
if not public_uri:
|
||||
print(f" Chyba: odpověď neobsahuje publicUri")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f" Chyba načítání publicUri: {e}")
|
||||
return False
|
||||
|
||||
# Krok 2: stáhni soubor přímo z publicUri (bez auth hlavičky)
|
||||
try:
|
||||
r = req_lib.get(public_uri, stream=True, timeout=60)
|
||||
r.raise_for_status()
|
||||
with open(dest, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" Chyba stahování souboru: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main() -> None:
|
||||
dry_run = DRY_RUN or "--dry-run" in sys.argv
|
||||
if dry_run:
|
||||
print("[dry-run] Pouze zobrazuji co by se stáhlo, nic nestahuju.\n")
|
||||
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError:
|
||||
print("Chybí playwright: pip install playwright && playwright install chrome")
|
||||
sys.exit(1)
|
||||
|
||||
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
|
||||
_set_chrome_cert_policy()
|
||||
|
||||
token = None
|
||||
|
||||
with sync_playwright() as p:
|
||||
context = p.chromium.launch_persistent_context(
|
||||
user_data_dir=CHROME_PROFILE,
|
||||
channel="chrome",
|
||||
headless=False,
|
||||
slow_mo=100,
|
||||
ignore_https_errors=True,
|
||||
accept_downloads=True,
|
||||
args=["--force-renderer-accessibility"],
|
||||
)
|
||||
try:
|
||||
loaded = load_cookies(context)
|
||||
print(f"Cookies načtené z JSON: {loaded}")
|
||||
|
||||
page = context.new_page()
|
||||
|
||||
print("Naviguji na VZP Point Odeslaná podání...")
|
||||
try:
|
||||
page.goto(DASHBOARD_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
print(f"Navigace: {e}")
|
||||
|
||||
if page.url.startswith("https://auth.vzp.cz/signin"):
|
||||
print("Přihlašovací stránka — klikám na 'Certifikát'...")
|
||||
cert_btn = page.locator("a, button").filter(has_text=re.compile(r"certifikát", re.I)).first
|
||||
cert_btn.wait_for(state="visible", timeout=10_000)
|
||||
cert_btn.click(no_wait_after=True)
|
||||
print("Pokud se zobrazí dialog výběru certifikátu, vyberte ho ručně (max 60 s)...")
|
||||
time.sleep(60)
|
||||
page = context.new_page()
|
||||
try:
|
||||
page.goto(DASHBOARD_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
print(f"Navigace po auth: {e}")
|
||||
if not page.url.startswith("https://point.vzp.cz"):
|
||||
print(f"Přihlášení selhalo. URL: {page.url}")
|
||||
return
|
||||
|
||||
print("Přihlášení OK.")
|
||||
page.wait_for_load_state("networkidle", timeout=15_000)
|
||||
|
||||
token = extract_bearer_token(page)
|
||||
if token:
|
||||
print("Bearer token načten.")
|
||||
else:
|
||||
print("Nepodařilo se načíst Bearer token ze stránky.")
|
||||
|
||||
finally:
|
||||
saved = save_cookies(context)
|
||||
print(f"Uloženo {saved} VZP cookies.")
|
||||
context.close()
|
||||
|
||||
if not token:
|
||||
sys.exit(1)
|
||||
|
||||
print("\nNačítám seznam podání...")
|
||||
try:
|
||||
forms = fetch_all_forms(token)
|
||||
except Exception as e:
|
||||
print(f"Chyba načítání podání: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
existing = set(os.listdir(DOWNLOAD_DIR))
|
||||
print(f"\nV archivu: {len(existing)} souborů.")
|
||||
print(f"Celkem podání v API: {len(forms)}\n")
|
||||
|
||||
downloaded = 0
|
||||
skipped = 0
|
||||
no_file = 0
|
||||
|
||||
for form in forms:
|
||||
result = form.get("result") or {}
|
||||
result_file = result.get("resultFile") or {}
|
||||
file_id = result_file.get("fileId")
|
||||
orig_name = result_file.get("name", "")
|
||||
|
||||
if not file_id or not orig_name:
|
||||
no_file += 1
|
||||
continue
|
||||
|
||||
date_str = parse_date(form.get("created", ""))
|
||||
filename = f"{date_str} {orig_name}"
|
||||
state = form.get("state", "")
|
||||
|
||||
if filename in existing:
|
||||
print(f" ✓ {filename}")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
size = result_file.get("size", 0)
|
||||
print(f" ↓ {filename} ({size:,} B) [{state}]")
|
||||
|
||||
if dry_run:
|
||||
downloaded += 1
|
||||
continue
|
||||
|
||||
dest = os.path.join(DOWNLOAD_DIR, filename)
|
||||
if download_file(token, form["id"], file_id, dest):
|
||||
existing.add(filename)
|
||||
downloaded += 1
|
||||
|
||||
print()
|
||||
if dry_run:
|
||||
print(f"[dry-run] Ke stažení: {downloaded}, přeskočeno: {skipped}, bez souboru: {no_file}")
|
||||
else:
|
||||
print(f"Staženo: {downloaded}, přeskočeno (již existovalo): {skipped}, bez souboru: {no_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user