notebookvb
This commit is contained in:
@@ -0,0 +1,237 @@
|
||||
"""
|
||||
Stahování NOVÝCH zpráv ze schránek ZPŠ — zastaví se při první již stažené zprávě.
|
||||
|
||||
Použij po 01_prihlaseni.py (ten uloží zps_cookies.json).
|
||||
|
||||
Co dělá:
|
||||
- Prochází každou schránku od nejnovějších zpráv
|
||||
- Jakmile narazí na zprávu, která už je v Staženo/, okamžitě zastaví danou schránku
|
||||
- Vhodné pro pravidelné spouštění — stáhne jen to nové
|
||||
|
||||
POUŽITÍ:
|
||||
python 03_stahuj_nove.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
BASE_URL = "https://portal.zpskoda.cz"
|
||||
INBOX_URL = f"{BASE_URL}/app/prehled-zprav-ve-schrankach"
|
||||
DOWNLOAD_URL = f"{BASE_URL}/html/prehled-zprav-ve-schrankach/zobrazit-prilohu"
|
||||
PROTOKOL_URL = f"{BASE_URL}/html/prehled-zprav-ve-schrankach/zobrazit-protokol"
|
||||
|
||||
COOKIES_FILE = os.path.abspath(os.path.join(os.path.dirname(__file__), "zps_cookies.json"))
|
||||
DOWNLOAD_DIR = os.path.join(os.path.dirname(__file__), "Staženo")
|
||||
|
||||
SCHRANKY = {
|
||||
"17-schranka-poskytovatele-zdravotnich-sluzeb": "Schránka poskytovatele ZS",
|
||||
"19-schranka-klienta": "Schránka klienta",
|
||||
"145-vypis-registrovanych-pojistencu": "Výpis registrovaných pojištěnců",
|
||||
"169-zpravy-od-klientu-pzp-": "Zprávy od klientů PZP",
|
||||
"181-schranka-klientu-portalu": "Schránka klientů portálu",
|
||||
}
|
||||
|
||||
|
||||
def parse_date(date_str: str) -> str:
|
||||
try:
|
||||
return datetime.strptime(date_str.strip()[:19], "%d.%m.%Y %H:%M:%S").strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
try:
|
||||
return datetime.strptime(date_str.strip()[:10], "%d.%m.%Y").strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
return "0000-00-00"
|
||||
|
||||
|
||||
def safe_filename(name: str) -> str:
|
||||
return re.sub(r'[\\/:*?"<>|]', "_", name).strip()
|
||||
|
||||
|
||||
def parse_row(cells: list) -> dict:
|
||||
date_raw = cells[1].strip() if len(cells) > 1 else ""
|
||||
desc_raw = cells[2].strip() if len(cells) > 2 else ""
|
||||
fname_raw = cells[3].strip() if len(cells) > 3 else ""
|
||||
|
||||
desc_lines = [l.strip() for l in desc_raw.split("\n") if l.strip()]
|
||||
if len(desc_lines) >= 3:
|
||||
description = desc_lines[2]
|
||||
elif len(desc_lines) >= 2:
|
||||
description = desc_lines[1]
|
||||
else:
|
||||
description = desc_lines[0] if desc_lines else ""
|
||||
description = description[:80]
|
||||
|
||||
fname_match = re.match(r'^(.+?)\s*\(\d{2}\.\d{2}\.\d{4}\)\s*$', fname_raw)
|
||||
original = fname_match.group(1).strip() if fname_match else fname_raw.split("(")[0].strip()
|
||||
orig_path = Path(original)
|
||||
stem = orig_path.stem or "zprava"
|
||||
ext = orig_path.suffix or ""
|
||||
|
||||
date_iso = parse_date(date_raw)
|
||||
name = f"{date_iso} {safe_filename(description)} ({safe_filename(stem)}){ext}"
|
||||
if len(name) > 240:
|
||||
name = f"{date_iso} ({safe_filename(stem)}){ext}"
|
||||
|
||||
return {"date": date_iso, "desc": description, "original": original, "filename": name}
|
||||
|
||||
|
||||
def process_schránka(page, context, segment: str, name: str, already: set) -> int:
|
||||
"""
|
||||
Prochází schránku od nejnovějších zpráv a stahuje, dokud nenarazí na již staženou.
|
||||
Vrátí počet stažených souborů.
|
||||
"""
|
||||
downloaded = 0
|
||||
page_num = 1
|
||||
seen_ids: set = set()
|
||||
|
||||
while True:
|
||||
url = f"{INBOX_URL}/{segment}/stranka-{page_num}"
|
||||
print(f" Stranka {page_num}: {url}")
|
||||
try:
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
print(f" Navigace selhala: {e}")
|
||||
break
|
||||
|
||||
page.wait_for_load_state("networkidle", timeout=15_000)
|
||||
|
||||
data = page.evaluate("""() => {
|
||||
const rows = [];
|
||||
for (const tr of document.querySelectorAll('table tr')) {
|
||||
const cells = Array.from(tr.querySelectorAll('td')).map(td => td.innerText.trim());
|
||||
if (cells.length < 4) continue;
|
||||
const dlLink = tr.querySelector('a[onclick*="SchrPolOpenFile"]');
|
||||
if (!dlLink) continue;
|
||||
const mFile = dlLink.getAttribute('onclick').match(/\\d+/);
|
||||
const protLink = tr.querySelector('a[onclick*="SchrPolDBProtokol"]');
|
||||
const mProt = protLink ? protLink.getAttribute('onclick').match(/\\d+/) : null;
|
||||
rows.push({
|
||||
cells,
|
||||
fileId: mFile ? mFile[0] : null,
|
||||
protokolId: mProt ? mProt[0] : null,
|
||||
});
|
||||
}
|
||||
return rows;
|
||||
}""")
|
||||
rows = [r for r in data if r["fileId"]]
|
||||
|
||||
if not rows:
|
||||
print(f" Stranka {page_num} - zadne radky, koncim schranku.")
|
||||
break
|
||||
|
||||
current_ids = {r["fileId"] for r in rows}
|
||||
if current_ids & seen_ids:
|
||||
print(f" Stranka {page_num} - opakujici se obsah, koncim schranku.")
|
||||
break
|
||||
seen_ids.update(current_ids)
|
||||
print(f" Nalezeno {len(rows)} zprav.")
|
||||
|
||||
stop = False
|
||||
for row in rows:
|
||||
info = parse_row(row["cells"])
|
||||
target = os.path.join(DOWNLOAD_DIR, info["filename"])
|
||||
|
||||
if info["filename"] in already or os.path.exists(target):
|
||||
print(f" [stop] Nalezena již stažená zpráva: {info['filename']}")
|
||||
stop = True
|
||||
break
|
||||
|
||||
dl_url = f"{DOWNLOAD_URL}?zprava_id={row['fileId']}"
|
||||
try:
|
||||
r = context.request.get(dl_url, headers={"Referer": INBOX_URL}, timeout=30_000)
|
||||
if not r.ok:
|
||||
print(f" HTTP {r.status} priloha (id={row['fileId']})")
|
||||
else:
|
||||
with open(target, "wb") as f:
|
||||
f.write(r.body())
|
||||
print(f" OK: {info['filename']}")
|
||||
already.add(info["filename"])
|
||||
downloaded += 1
|
||||
except Exception as e:
|
||||
print(f" Chyba priloha (id={row['fileId']}): {e}")
|
||||
time.sleep(1.0)
|
||||
|
||||
if row.get("protokolId"):
|
||||
prot_name = safe_filename(f"{info['date']} {info['desc']} (protokol-{row['protokolId']}).html")
|
||||
prot_target = os.path.join(DOWNLOAD_DIR, prot_name)
|
||||
if prot_name not in already and not os.path.exists(prot_target):
|
||||
prot_url = f"{PROTOKOL_URL}?id={row['protokolId']}"
|
||||
try:
|
||||
r2 = context.request.get(prot_url, headers={"Referer": INBOX_URL}, timeout=30_000)
|
||||
if r2.ok:
|
||||
with open(prot_target, "wb") as f:
|
||||
f.write(r2.body())
|
||||
print(f" OK: {prot_name}")
|
||||
already.add(prot_name)
|
||||
downloaded += 1
|
||||
else:
|
||||
print(f" HTTP {r2.status} protokol (id={row['protokolId']})")
|
||||
except Exception as e:
|
||||
print(f" Chyba protokol (id={row['protokolId']}): {e}")
|
||||
time.sleep(1.0)
|
||||
|
||||
if stop:
|
||||
break
|
||||
|
||||
page_num += 1
|
||||
|
||||
return downloaded
|
||||
|
||||
|
||||
def main() -> None:
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError:
|
||||
print("Chybi playwright: pip install playwright && playwright install chrome")
|
||||
sys.exit(1)
|
||||
|
||||
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
|
||||
|
||||
if not os.path.exists(COOKIES_FILE):
|
||||
print(f"Soubor {COOKIES_FILE} nenalezen - spust 01_prihlaseni.py")
|
||||
sys.exit(1)
|
||||
|
||||
with open(COOKIES_FILE, encoding="utf-8") as f:
|
||||
cookies = json.load(f)
|
||||
|
||||
with sync_playwright() as p:
|
||||
context = p.chromium.launch_persistent_context(
|
||||
user_data_dir=os.path.join(os.path.dirname(__file__), "chrome_profile"),
|
||||
channel="chrome",
|
||||
headless=False,
|
||||
slow_mo=100,
|
||||
ignore_https_errors=True,
|
||||
)
|
||||
try:
|
||||
context.add_cookies(cookies)
|
||||
page = context.new_page()
|
||||
page.goto(INBOX_URL, wait_until="domcontentloaded", timeout=30_000)
|
||||
|
||||
if "prihlaseni" in page.url or "login" in page.url.lower():
|
||||
print("Cookies expirovala - spust 01_prihlaseni.py")
|
||||
return
|
||||
print("Prihlaseni OK\n")
|
||||
|
||||
already = set(os.listdir(DOWNLOAD_DIR))
|
||||
print(f"V archivu: {len(already)} souboru.\n")
|
||||
|
||||
celkem = 0
|
||||
for segment, name in SCHRANKY.items():
|
||||
print(f"\n=== {name} ===")
|
||||
dl = process_schránka(page, context, segment, name, already)
|
||||
print(f" {name}: stazeno {dl}")
|
||||
celkem += dl
|
||||
|
||||
print(f"\nHotovo. Celkem stazeno: {celkem}")
|
||||
|
||||
finally:
|
||||
context.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,35 @@
|
||||
"""
|
||||
Přihlásí se na portál ZPŠ a stáhne nové zprávy.
|
||||
|
||||
Kombinuje 01_prihlaseni.py + 03_stahuj_nove.py do jednoho spuštění.
|
||||
|
||||
POUŽITÍ:
|
||||
python 04_prihlaseni_a_stahuj_nove.py
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def run(script: str) -> None:
|
||||
result = subprocess.run(
|
||||
[sys.executable, os.path.join(DIR, script)],
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise SystemExit(f"Skript {script} skončil s chybou (kód {result.returncode})")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
print("=== Přihlášení ===")
|
||||
run("01_prihlaseni.py")
|
||||
|
||||
print("\n=== Stahování nových zpráv ===")
|
||||
run("03_stahuj_nove.py")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,7 +1,7 @@
|
||||
[
|
||||
{
|
||||
"name": "SID",
|
||||
"value": "9ed44d48a017f8915cdfb566d2e2e952",
|
||||
"value": "e85dcec9acf6345f151cd5996be23576",
|
||||
"domain": ".portal.zpskoda.cz",
|
||||
"path": "/",
|
||||
"expires": -1,
|
||||
@@ -14,7 +14,7 @@
|
||||
"value": "CERT",
|
||||
"domain": ".portal.zpskoda.cz",
|
||||
"path": "/",
|
||||
"expires": 1808246438,
|
||||
"expires": 1808281979,
|
||||
"secure": true,
|
||||
"httpOnly": false,
|
||||
"sameSite": "Lax"
|
||||
|
||||
Reference in New Issue
Block a user