Odstraň chrome_profile z gitu, přidej VoZP skripty
- git rm --cached chrome_profile (111 VZP) — omylem přidaný profil - .gitignore: **/chrome_profile/, **/cookies.json — generické vzory místo 1160 specifických řádků - Přidány skripty 01/02/03 pro VoZP (201 VoZP) - Přidány skripty 01/02/03 pro VZP (111 VZP) - Staženo/ zůstává v gitu Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
01 - Přihlášení na portál VoZP (prehled-zprav-ve-schrankach)
|
||||
Otevře Chrome, přihlásí se certifikátem přes Signer komponentu a naviguje na schránku zpráv.
|
||||
Okno zůstane otevřené — skript čeká na stisk Enter.
|
||||
Použití: python 01_prihlaseni.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import winreg
|
||||
|
||||
LOGIN_URL = "https://portal.vozp.cz/app/prihlaseni"
|
||||
INBOX_URL = "https://portal.vozp.cz/app/prehled-zprav-ve-schrankach"
|
||||
CHROME_PROFILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "chrome_profile"))
|
||||
COOKIES_FILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "vozp_cookies.json"))
|
||||
|
||||
|
||||
def load_cookies(context) -> int:
|
||||
"""Načte dříve uložené cookies zpět do kontextu."""
|
||||
if not os.path.exists(COOKIES_FILE):
|
||||
return 0
|
||||
try:
|
||||
with open(COOKIES_FILE, "r", encoding="utf-8") as f:
|
||||
cookies = json.load(f)
|
||||
context.add_cookies(cookies)
|
||||
return len(cookies)
|
||||
except Exception as e:
|
||||
print(f" Chyba při načítání cookies: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def save_cookies(context) -> int:
|
||||
"""Uloží VoZP cookies (i session-only) do JSON souboru."""
|
||||
try:
|
||||
all_cookies = context.cookies()
|
||||
vozp = [c for c in all_cookies if any(
|
||||
d in c.get("domain", "") for d in ["vozp.cz", "portalzp.cz"]
|
||||
)]
|
||||
with open(COOKIES_FILE, "w", encoding="utf-8") as f:
|
||||
json.dump(vozp, f, indent=2, ensure_ascii=False)
|
||||
return len(vozp)
|
||||
except Exception as e:
|
||||
print(f" Chyba při ukládání cookies: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def _delete_chrome_cert_policy() -> None:
|
||||
"""Smaže AutoSelectCertificateForUrls politiku — Chrome pak zobrazí dialog přirozeně."""
|
||||
key_path = r"SOFTWARE\Policies\Google\Chrome\AutoSelectCertificateForUrls"
|
||||
try:
|
||||
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, key_path, access=winreg.KEY_SET_VALUE)
|
||||
winreg.DeleteValue(key, "1")
|
||||
winreg.CloseKey(key)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def main() -> None:
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError:
|
||||
print("Chybí playwright: pip install playwright && playwright install chrome")
|
||||
sys.exit(1)
|
||||
|
||||
_delete_chrome_cert_policy()
|
||||
|
||||
with sync_playwright() as p:
|
||||
context = p.chromium.launch_persistent_context(
|
||||
user_data_dir=CHROME_PROFILE,
|
||||
channel="chrome",
|
||||
headless=False,
|
||||
slow_mo=200,
|
||||
ignore_https_errors=True,
|
||||
args=["--force-renderer-accessibility"],
|
||||
)
|
||||
try:
|
||||
loaded = load_cookies(context)
|
||||
print(f"Profil: {CHROME_PROFILE}")
|
||||
print(f"Cookies z JSON: {loaded}")
|
||||
|
||||
page = context.new_page()
|
||||
|
||||
# Zkus rovnou inbox — pokud jsme přihlášeni, budeme tam
|
||||
print("Naviguji na schránku zpráv...")
|
||||
try:
|
||||
page.goto(INBOX_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
print(f"Navigace: {e}")
|
||||
|
||||
# Pokud nás přesměrovalo na login stránku
|
||||
if "prihlaseni" in page.url or "login" in page.url.lower():
|
||||
print("Přihlašovací stránka — klikám na 'Přihlásit se certifikátem'...")
|
||||
try:
|
||||
page.goto(LOGIN_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
print(f"Navigace na login: {e}")
|
||||
|
||||
cert_btn = page.locator("button").filter(has_text="certifikátem").first
|
||||
cert_btn.wait_for(state="visible", timeout=10_000)
|
||||
cert_btn.click(no_wait_after=True)
|
||||
|
||||
print("Vyskočí Signer komponenta — klikněte ANO (max 60 s)...")
|
||||
time.sleep(30)
|
||||
|
||||
# Po Signeru naviguj na inbox
|
||||
print("Naviguji na schránku zpráv...")
|
||||
try:
|
||||
page.goto(INBOX_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
print(f"Navigace po auth: {e}")
|
||||
|
||||
if "prehled-zprav" not in page.url and "uvodni-stranka" not in page.url:
|
||||
print(f"Přihlášení selhalo. URL: {page.url}")
|
||||
return
|
||||
|
||||
print(f"OK — přihlášení úspěšné. URL: {page.url}")
|
||||
|
||||
# Diagnostika cookies
|
||||
after = context.cookies()
|
||||
vozp_cookies = [c for c in after if any(
|
||||
d in c.get("domain", "") for d in ["vozp.cz", "portalzp.cz"]
|
||||
)]
|
||||
print(f"Cookies po auth: {len(vozp_cookies)}")
|
||||
for c in vozp_cookies:
|
||||
exp = c.get("expires", -1)
|
||||
persistent = "PERSISTENT" if exp > 0 else "SESSION-ONLY"
|
||||
print(f" - {c['name'][:60]} ({c['domain']}) [{persistent}]")
|
||||
|
||||
print("Okno zůstane otevřené. Stiskněte Enter pro zavření...")
|
||||
input()
|
||||
|
||||
finally:
|
||||
saved = save_cookies(context)
|
||||
print(f"Uloženo {saved} VoZP cookies do {COOKIES_FILE}")
|
||||
context.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
02 - Stažení VŠECH zpráv ze všech schránek VoZP portálu (jednorázová akce)
|
||||
Prochází stránkování, stahuje všechny soubory a pojmenuje je:
|
||||
YYYY-MM-DD Popis (původní_název).přípona
|
||||
Použití: python 02_stahuj_vse.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import winreg
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests as req
|
||||
|
||||
LOGIN_URL = "https://portal.vozp.cz/app/prihlaseni"
|
||||
BASE_URL = "https://portal.vozp.cz"
|
||||
INBOX_URL = f"{BASE_URL}/app/prehled-zprav-ve-schrankach"
|
||||
DOWNLOAD_URL = f"{BASE_URL}/html/prehled-zprav-ve-schrankach/zobrazit-prilohu"
|
||||
|
||||
CHROME_PROFILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "chrome_profile"))
|
||||
COOKIES_FILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "vozp_cookies.json"))
|
||||
DOWNLOAD_DIR = os.path.join(os.path.dirname(__file__), "Staženo")
|
||||
|
||||
# Všechny schránky — ID-segment : zobrazovaný název
|
||||
SCHRANKY = {
|
||||
"171-schranka-poskytovatele-zdravotnich-sluzeb": "Schránka PZS",
|
||||
"183-schranka-klientu-portalu": "Schránka klientů portálu",
|
||||
"185-schranka-pzs": "Schránka PZS2",
|
||||
"187-schranka-klienta": "Schránka klienta",
|
||||
"198-vypis-registrovanych-pacientu": "Výpis registrovaných pacientů",
|
||||
"200-zuctovaci-zpravy": "Zúčtovací zprávy",
|
||||
"205-vypis-osobnich-uctu-pojistencu": "Výpis osobních účtů pojištěnců",
|
||||
}
|
||||
|
||||
|
||||
def load_cookies(context) -> int:
|
||||
if not os.path.exists(COOKIES_FILE):
|
||||
return 0
|
||||
try:
|
||||
with open(COOKIES_FILE, "r", encoding="utf-8") as f:
|
||||
cookies = json.load(f)
|
||||
context.add_cookies(cookies)
|
||||
return len(cookies)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def save_cookies(context) -> int:
|
||||
try:
|
||||
all_cookies = context.cookies()
|
||||
vozp = [c for c in all_cookies if any(
|
||||
d in c.get("domain", "") for d in ["vozp.cz", "portalzp.cz"]
|
||||
)]
|
||||
with open(COOKIES_FILE, "w", encoding="utf-8") as f:
|
||||
json.dump(vozp, f, indent=2, ensure_ascii=False)
|
||||
return len(vozp)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def _delete_chrome_cert_policy() -> None:
|
||||
key_path = r"SOFTWARE\Policies\Google\Chrome\AutoSelectCertificateForUrls"
|
||||
try:
|
||||
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, key_path, access=winreg.KEY_SET_VALUE)
|
||||
winreg.DeleteValue(key, "1")
|
||||
winreg.CloseKey(key)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def parse_date(date_str: str) -> str:
|
||||
"""Převede 'DD.MM.YYYY HH:MM:SS' na 'YYYY-MM-DD'."""
|
||||
try:
|
||||
dt = datetime.strptime(date_str.strip()[:19], "%d.%m.%Y %H:%M:%S")
|
||||
return dt.strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
try:
|
||||
dt = datetime.strptime(date_str.strip()[:10], "%d.%m.%Y")
|
||||
return dt.strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
return "0000-00-00"
|
||||
|
||||
|
||||
def safe_filename(name: str) -> str:
|
||||
"""Odstraní znaky nevhodné pro název souboru."""
|
||||
return re.sub(r'[\\/:*?"<>|]', "_", name).strip()
|
||||
|
||||
|
||||
def parse_row(cells: list[str]) -> dict:
|
||||
"""
|
||||
Zpracuje buňky řádku tabulky.
|
||||
cells[1] = datum
|
||||
cells[2] = víceřádkový popis
|
||||
cells[3] = 'název_souboru.ext (DD.MM.YYYY)'
|
||||
"""
|
||||
date_raw = cells[1].strip() if len(cells) > 1 else ""
|
||||
desc_raw = cells[2].strip() if len(cells) > 2 else ""
|
||||
fname_raw = cells[3].strip() if len(cells) > 3 else ""
|
||||
|
||||
# Parsuj popis — řádky odděleny \n
|
||||
desc_lines = [l.strip() for l in desc_raw.split("\n") if l.strip()]
|
||||
if len(desc_lines) >= 3:
|
||||
description = desc_lines[2] # "Odpověď na podání č. 123 (...)"
|
||||
elif len(desc_lines) >= 2:
|
||||
description = desc_lines[1] # "01.02.2026 - 26.02.2026"
|
||||
else:
|
||||
description = desc_lines[0] if desc_lines else ""
|
||||
|
||||
# Zkrať příliš dlouhý popis
|
||||
description = description[:80]
|
||||
|
||||
# Parsuj název souboru: "Protokol-OK.html (15.04.2026)" → stem="Protokol-OK", ext=".html"
|
||||
fname_match = re.match(r'^(.+?)\s*\(\d{2}\.\d{2}\.\d{4}\)\s*$', fname_raw)
|
||||
original = fname_match.group(1).strip() if fname_match else fname_raw.split("(")[0].strip()
|
||||
orig_path = Path(original)
|
||||
stem = orig_path.stem or "zprava"
|
||||
ext = orig_path.suffix or ""
|
||||
|
||||
date_iso = parse_date(date_raw)
|
||||
name = f"{date_iso} {safe_filename(description)} ({safe_filename(stem)}){ext}"
|
||||
if len(name) > 240:
|
||||
name = f"{date_iso} ({safe_filename(stem)}){ext}"
|
||||
|
||||
return {
|
||||
"date": date_iso,
|
||||
"desc": description,
|
||||
"original": original,
|
||||
"filename": name,
|
||||
}
|
||||
|
||||
|
||||
def collect_rows(page) -> list[dict]:
|
||||
"""Vrátí seznam řádků aktuální stránky tabulky."""
|
||||
data = page.evaluate("""() => {
|
||||
const rows = [];
|
||||
for (const tr of document.querySelectorAll('table tr')) {
|
||||
const cells = Array.from(tr.querySelectorAll('td')).map(td => td.innerText.trim());
|
||||
if (cells.length < 4) continue;
|
||||
const dlLink = tr.querySelector('a[onclick*="SchrPolOpenFile"]');
|
||||
if (!dlLink) continue;
|
||||
const m = dlLink.getAttribute('onclick').match(/\\d+/);
|
||||
rows.push({ cells, fileId: m ? m[0] : null });
|
||||
}
|
||||
return rows;
|
||||
}""")
|
||||
return [r for r in data if r["fileId"]]
|
||||
|
||||
|
||||
def make_requests_session(context) -> req.Session:
|
||||
"""Vytvoří requests.Session se cookies z Playwright kontextu."""
|
||||
session = req.Session()
|
||||
for c in context.cookies():
|
||||
session.cookies.set(
|
||||
c["name"], c["value"],
|
||||
domain=c.get("domain", "").lstrip(".")
|
||||
)
|
||||
session.headers.update({
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
"Referer": INBOX_URL,
|
||||
})
|
||||
return session
|
||||
|
||||
|
||||
def download_file(session: req.Session, file_id: str, target: str) -> bool:
|
||||
"""Stáhne soubor přes přímý HTTP požadavek. Vrací True při úspěchu."""
|
||||
url = f"{DOWNLOAD_URL}?zprava_id={file_id}"
|
||||
try:
|
||||
r = session.get(url, timeout=30, stream=True)
|
||||
r.raise_for_status()
|
||||
with open(target, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" Chyba stahování (id={file_id}): {e}")
|
||||
return False
|
||||
|
||||
|
||||
def process_schránka(page, session: req.Session, segment: str, name: str, already: set) -> tuple[int, int]:
|
||||
"""Projde všechny stránky schránky a stáhne soubory. Vrací (staženo, přeskočeno)."""
|
||||
downloaded = 0
|
||||
skipped = 0
|
||||
page_num = 1
|
||||
|
||||
while True:
|
||||
url = f"{INBOX_URL}/{segment}/stranka-{page_num}"
|
||||
print(f" Stránka {page_num}: {url}")
|
||||
try:
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
print(f" Navigace selhala: {e}")
|
||||
break
|
||||
|
||||
page.wait_for_load_state("networkidle", timeout=15_000)
|
||||
rows = collect_rows(page)
|
||||
|
||||
if not rows:
|
||||
print(f" Stránka {page_num} — žádné řádky, končím schránku.")
|
||||
break
|
||||
|
||||
print(f" Nalezeno {len(rows)} zpráv.")
|
||||
|
||||
for row in rows:
|
||||
info = parse_row(row["cells"])
|
||||
target = os.path.join(DOWNLOAD_DIR, info["filename"])
|
||||
|
||||
if info["filename"] in already or os.path.exists(target):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
print(f" Stahuji: {info['filename']}")
|
||||
if download_file(session, row["fileId"], target):
|
||||
already.add(info["filename"])
|
||||
downloaded += 1
|
||||
time.sleep(0.3)
|
||||
|
||||
# Zkontroluj, jestli existuje další stránka
|
||||
has_next = page.evaluate("""() => {
|
||||
return !!Array.from(document.querySelectorAll('a')).find(
|
||||
a => a.innerText.trim() === 'Další stránka' && !a.closest('[aria-disabled]')
|
||||
);
|
||||
}""")
|
||||
if not has_next:
|
||||
break
|
||||
page_num += 1
|
||||
|
||||
return downloaded, skipped
|
||||
|
||||
|
||||
def ensure_logged_in(page, context) -> bool:
|
||||
"""Ověří přihlášení. Pokud ne, provede přihlášení certifikátem."""
|
||||
try:
|
||||
page.goto(INBOX_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
print(f"Navigace: {e}")
|
||||
|
||||
if "prihlaseni" in page.url or "login" in page.url.lower():
|
||||
print("Nutné přihlášení — klikám na 'Přihlásit se certifikátem'...")
|
||||
try:
|
||||
page.goto(LOGIN_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception:
|
||||
pass
|
||||
cert_btn = page.locator("button").filter(has_text="certifikátem").first
|
||||
cert_btn.wait_for(state="visible", timeout=10_000)
|
||||
cert_btn.click(no_wait_after=True)
|
||||
print("Vyskočí Signer komponenta — klikněte ANO (max 60 s)...")
|
||||
time.sleep(40)
|
||||
try:
|
||||
page.goto(INBOX_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception:
|
||||
pass
|
||||
if "prihlaseni" in page.url:
|
||||
print("Přihlášení selhalo.")
|
||||
return False
|
||||
|
||||
print(f"Přihlášení OK. URL: {page.url}")
|
||||
return True
|
||||
|
||||
|
||||
def main() -> None:
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError:
|
||||
print("Chybí playwright: pip install playwright && playwright install chrome")
|
||||
sys.exit(1)
|
||||
|
||||
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
|
||||
_delete_chrome_cert_policy()
|
||||
|
||||
with sync_playwright() as p:
|
||||
context = p.chromium.launch_persistent_context(
|
||||
user_data_dir=CHROME_PROFILE,
|
||||
channel="chrome",
|
||||
headless=False,
|
||||
slow_mo=100,
|
||||
ignore_https_errors=True,
|
||||
args=["--force-renderer-accessibility"],
|
||||
)
|
||||
try:
|
||||
loaded = load_cookies(context)
|
||||
print(f"Cookies načtené z JSON: {loaded}")
|
||||
|
||||
page = context.new_page()
|
||||
|
||||
if not ensure_logged_in(page, context):
|
||||
return
|
||||
|
||||
session = make_requests_session(context)
|
||||
already = set(os.listdir(DOWNLOAD_DIR))
|
||||
print(f"V archivu: {len(already)} souborů.\n")
|
||||
|
||||
total_dl = 0
|
||||
total_skip = 0
|
||||
|
||||
for segment, name in SCHRANKY.items():
|
||||
print(f"\n=== Schránka: {name} ===")
|
||||
dl, sk = process_schránka(page, session, segment, name, already)
|
||||
print(f" Schránka {name}: staženo {dl}, přeskočeno {sk}")
|
||||
total_dl += dl
|
||||
total_skip += sk
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Hotovo. Celkem staženo: {total_dl}, přeskočeno: {total_skip}")
|
||||
|
||||
finally:
|
||||
saved = save_cookies(context)
|
||||
print(f"Uloženo {saved} VoZP cookies.")
|
||||
context.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,310 @@
|
||||
"""
|
||||
03 - Rozdílové stažení nových zpráv ze schránek VoZP portálu
|
||||
Jde od nejnovější zprávy dolů. Jakmile narazí na zprávu, kterou už máme
|
||||
stažnou (cílový soubor existuje), ukončí danou schránku. Prochází stránky
|
||||
pokud nenajde existující zprávu v aktuální stránce.
|
||||
Použití: python 03_stahuj_nove.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import winreg
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import requests as req
|
||||
|
||||
LOGIN_URL = "https://portal.vozp.cz/app/prihlaseni"
|
||||
BASE_URL = "https://portal.vozp.cz"
|
||||
INBOX_URL = f"{BASE_URL}/app/prehled-zprav-ve-schrankach"
|
||||
DOWNLOAD_URL = f"{BASE_URL}/html/prehled-zprav-ve-schrankach/zobrazit-prilohu"
|
||||
|
||||
CHROME_PROFILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "chrome_profile"))
|
||||
COOKIES_FILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "vozp_cookies.json"))
|
||||
DOWNLOAD_DIR = os.path.join(os.path.dirname(__file__), "Staženo")
|
||||
|
||||
SCHRANKY = {
|
||||
"171-schranka-poskytovatele-zdravotnich-sluzeb": "Schránka PZS",
|
||||
"183-schranka-klientu-portalu": "Schránka klientů portálu",
|
||||
"185-schranka-pzs": "Schránka PZS2",
|
||||
"187-schranka-klienta": "Schránka klienta",
|
||||
"198-vypis-registrovanych-pacientu": "Výpis registrovaných pacientů",
|
||||
"200-zuctovaci-zpravy": "Zúčtovací zprávy",
|
||||
"205-vypis-osobnich-uctu-pojistencu": "Výpis osobních účtů pojištěnců",
|
||||
}
|
||||
|
||||
|
||||
def load_cookies(context) -> int:
|
||||
if not os.path.exists(COOKIES_FILE):
|
||||
return 0
|
||||
try:
|
||||
with open(COOKIES_FILE, "r", encoding="utf-8") as f:
|
||||
cookies = json.load(f)
|
||||
context.add_cookies(cookies)
|
||||
return len(cookies)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def save_cookies(context) -> int:
|
||||
try:
|
||||
all_cookies = context.cookies()
|
||||
vozp = [c for c in all_cookies if any(
|
||||
d in c.get("domain", "") for d in ["vozp.cz", "portalzp.cz"]
|
||||
)]
|
||||
with open(COOKIES_FILE, "w", encoding="utf-8") as f:
|
||||
json.dump(vozp, f, indent=2, ensure_ascii=False)
|
||||
return len(vozp)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def _delete_chrome_cert_policy() -> None:
|
||||
key_path = r"SOFTWARE\Policies\Google\Chrome\AutoSelectCertificateForUrls"
|
||||
try:
|
||||
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, key_path, access=winreg.KEY_SET_VALUE)
|
||||
winreg.DeleteValue(key, "1")
|
||||
winreg.CloseKey(key)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def parse_date(date_str: str) -> str:
|
||||
try:
|
||||
dt = datetime.strptime(date_str.strip()[:19], "%d.%m.%Y %H:%M:%S")
|
||||
return dt.strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
try:
|
||||
dt = datetime.strptime(date_str.strip()[:10], "%d.%m.%Y")
|
||||
return dt.strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
return "0000-00-00"
|
||||
|
||||
|
||||
def safe_filename(name: str) -> str:
|
||||
return re.sub(r'[\\/:*?"<>|]', "_", name).strip()
|
||||
|
||||
|
||||
def parse_row(cells: list[str]) -> dict:
|
||||
date_raw = cells[1].strip() if len(cells) > 1 else ""
|
||||
desc_raw = cells[2].strip() if len(cells) > 2 else ""
|
||||
fname_raw = cells[3].strip() if len(cells) > 3 else ""
|
||||
|
||||
desc_lines = [l.strip() for l in desc_raw.split("\n") if l.strip()]
|
||||
if len(desc_lines) >= 3:
|
||||
description = desc_lines[2]
|
||||
elif len(desc_lines) >= 2:
|
||||
description = desc_lines[1]
|
||||
else:
|
||||
description = desc_lines[0] if desc_lines else ""
|
||||
|
||||
description = description[:80]
|
||||
|
||||
fname_match = re.match(r'^(.+?)\s*\(\d{2}\.\d{2}\.\d{4}\)\s*$', fname_raw)
|
||||
original = fname_match.group(1).strip() if fname_match else fname_raw.split("(")[0].strip()
|
||||
orig_path = Path(original)
|
||||
stem = orig_path.stem or "zprava"
|
||||
ext = orig_path.suffix or ""
|
||||
|
||||
date_iso = parse_date(date_raw)
|
||||
name = f"{date_iso} {safe_filename(description)} ({safe_filename(stem)}){ext}"
|
||||
if len(name) > 240:
|
||||
name = f"{date_iso} ({safe_filename(stem)}){ext}"
|
||||
|
||||
return {
|
||||
"date": date_iso,
|
||||
"desc": description,
|
||||
"original": original,
|
||||
"filename": name,
|
||||
}
|
||||
|
||||
|
||||
def collect_rows(page) -> list[dict]:
|
||||
data = page.evaluate("""() => {
|
||||
const rows = [];
|
||||
for (const tr of document.querySelectorAll('table tr')) {
|
||||
const cells = Array.from(tr.querySelectorAll('td')).map(td => td.innerText.trim());
|
||||
if (cells.length < 4) continue;
|
||||
const dlLink = tr.querySelector('a[onclick*="SchrPolOpenFile"]');
|
||||
if (!dlLink) continue;
|
||||
const m = dlLink.getAttribute('onclick').match(/\\d+/);
|
||||
rows.push({ cells, fileId: m ? m[0] : null });
|
||||
}
|
||||
return rows;
|
||||
}""")
|
||||
return [r for r in data if r["fileId"]]
|
||||
|
||||
|
||||
def make_requests_session(context) -> req.Session:
|
||||
session = req.Session()
|
||||
for c in context.cookies():
|
||||
session.cookies.set(
|
||||
c["name"], c["value"],
|
||||
domain=c.get("domain", "").lstrip(".")
|
||||
)
|
||||
session.headers.update({
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
"Referer": INBOX_URL,
|
||||
})
|
||||
return session
|
||||
|
||||
|
||||
def download_file(session: req.Session, file_id: str, target: str) -> bool:
|
||||
url = f"{DOWNLOAD_URL}?zprava_id={file_id}"
|
||||
try:
|
||||
r = session.get(url, timeout=30, stream=True)
|
||||
r.raise_for_status()
|
||||
with open(target, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" Chyba stahování (id={file_id}): {e}")
|
||||
return False
|
||||
|
||||
|
||||
def process_schránka_nove(page, session: req.Session, segment: str, name: str, already: set) -> tuple[int, int]:
|
||||
"""
|
||||
Prochází schránku od nejnovějších zpráv. Zastaví se při první
|
||||
zprávě, která již existuje v archivu.
|
||||
Vrací (staženo, přeskočeno).
|
||||
"""
|
||||
downloaded = 0
|
||||
stop = False
|
||||
page_num = 1
|
||||
|
||||
while not stop:
|
||||
url = f"{INBOX_URL}/{segment}/stranka-{page_num}"
|
||||
print(f" Stránka {page_num}: {url}")
|
||||
try:
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
print(f" Navigace selhala: {e}")
|
||||
break
|
||||
|
||||
page.wait_for_load_state("networkidle", timeout=15_000)
|
||||
rows = collect_rows(page)
|
||||
|
||||
if not rows:
|
||||
print(f" Stránka {page_num} — žádné řádky, končím schránku.")
|
||||
break
|
||||
|
||||
found_existing_on_page = False
|
||||
|
||||
for row in rows:
|
||||
info = parse_row(row["cells"])
|
||||
target = os.path.join(DOWNLOAD_DIR, info["filename"])
|
||||
|
||||
if info["filename"] in already or os.path.exists(target):
|
||||
print(f" Nalezena existující zpráva: {info['filename']}")
|
||||
print(" Končím — starší zprávy jsou již staženy.")
|
||||
stop = True
|
||||
found_existing_on_page = True
|
||||
break
|
||||
|
||||
print(f" [{downloaded + 1}] Nová zpráva: {info['filename']}")
|
||||
if download_file(session, row["fileId"], target):
|
||||
already.add(info["filename"])
|
||||
downloaded += 1
|
||||
time.sleep(0.3)
|
||||
|
||||
if stop:
|
||||
break
|
||||
|
||||
# Prošli jsme celou stránku bez existující → zkus další
|
||||
has_next = page.evaluate("""() => {
|
||||
return !!Array.from(document.querySelectorAll('a')).find(
|
||||
a => a.innerText.trim() === 'Další stránka' && !a.closest('[aria-disabled]')
|
||||
);
|
||||
}""")
|
||||
if not has_next:
|
||||
print(" Další stránky nejsou — vše je nové nebo schránka vyčerpána.")
|
||||
break
|
||||
page_num += 1
|
||||
|
||||
return downloaded, 0
|
||||
|
||||
|
||||
def ensure_logged_in(page, context) -> bool:
|
||||
try:
|
||||
page.goto(INBOX_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
print(f"Navigace: {e}")
|
||||
|
||||
if "prihlaseni" in page.url or "login" in page.url.lower():
|
||||
print("Nutné přihlášení — klikám na 'Přihlásit se certifikátem'...")
|
||||
try:
|
||||
page.goto(LOGIN_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception:
|
||||
pass
|
||||
cert_btn = page.locator("button").filter(has_text="certifikátem").first
|
||||
cert_btn.wait_for(state="visible", timeout=10_000)
|
||||
cert_btn.click(no_wait_after=True)
|
||||
print("Vyskočí Signer komponenta — klikněte ANO (max 60 s)...")
|
||||
time.sleep(40)
|
||||
try:
|
||||
page.goto(INBOX_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception:
|
||||
pass
|
||||
if "prihlaseni" in page.url:
|
||||
print("Přihlášení selhalo.")
|
||||
return False
|
||||
|
||||
print(f"Přihlášení OK. URL: {page.url}")
|
||||
return True
|
||||
|
||||
|
||||
def main() -> None:
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError:
|
||||
print("Chybí playwright: pip install playwright && playwright install chrome")
|
||||
sys.exit(1)
|
||||
|
||||
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
|
||||
_delete_chrome_cert_policy()
|
||||
|
||||
with sync_playwright() as p:
|
||||
context = p.chromium.launch_persistent_context(
|
||||
user_data_dir=CHROME_PROFILE,
|
||||
channel="chrome",
|
||||
headless=False,
|
||||
slow_mo=100,
|
||||
ignore_https_errors=True,
|
||||
args=["--force-renderer-accessibility"],
|
||||
)
|
||||
try:
|
||||
loaded = load_cookies(context)
|
||||
print(f"Cookies načtené z JSON: {loaded}")
|
||||
|
||||
page = context.new_page()
|
||||
|
||||
if not ensure_logged_in(page, context):
|
||||
return
|
||||
|
||||
session = make_requests_session(context)
|
||||
already = set(os.listdir(DOWNLOAD_DIR))
|
||||
print(f"V archivu: {len(already)} souborů.\n")
|
||||
|
||||
total_dl = 0
|
||||
|
||||
for segment, name in SCHRANKY.items():
|
||||
print(f"\n=== Schránka: {name} ===")
|
||||
dl, _ = process_schránka_nove(page, session, segment, name, already)
|
||||
print(f" Schránka {name}: nových staženo {dl}")
|
||||
total_dl += dl
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Hotovo. Celkem nových souborů: {total_dl}")
|
||||
|
||||
finally:
|
||||
saved = save_cookies(context)
|
||||
print(f"Uloženo {saved} VoZP cookies.")
|
||||
context.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user