notebookvb
This commit is contained in:
@@ -13,6 +13,7 @@ POUŽITÍ:
|
||||
python 02_stahuj_vse.py
|
||||
"""
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
@@ -103,11 +104,17 @@ def stahni_zpravu(session: requests.Session, msg: dict) -> bool:
|
||||
"""Stáhne protokol/přílohu zprávy. Vrátí True pokud staženo, False pokud přeskočeno."""
|
||||
nazev_base = safe_name(f"{msg['datum']} {msg['druh']} (Ref. {msg['ref']})")
|
||||
|
||||
# Přeskoč pokud existuje v jakékoliv příponě
|
||||
for ext in (".html", ".pdf", ".xml", ".zip"):
|
||||
if os.path.exists(os.path.join(STAZENO_DIR, nazev_base + ext)):
|
||||
print(f" [přeskočeno] {nazev_base}{ext}")
|
||||
# Přeskoč pokud existuje soubor se stejným Ref. číslem (imunní vůči Unicode/mezera variantám)
|
||||
if msg["ref"]:
|
||||
existing = glob.glob(os.path.join(STAZENO_DIR, f"*(Ref. {msg['ref']}).*"))
|
||||
if existing:
|
||||
print(f" [přeskočeno] {os.path.basename(existing[0])}")
|
||||
return False
|
||||
else:
|
||||
for ext in (".html", ".pdf", ".xml", ".zip"):
|
||||
if os.path.exists(os.path.join(STAZENO_DIR, nazev_base + ext)):
|
||||
print(f" [přeskočeno] {nazev_base}{ext}")
|
||||
return False
|
||||
|
||||
# Detail zprávy → najdi download link
|
||||
r = session.get(f"{BASE_URL}/app/schranka/detail/{msg['id']}/", timeout=15)
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
"""
|
||||
Stahování NOVÝCH protokolů ze schránek ČPZP — zastaví se při první již stažené zprávě.
|
||||
|
||||
Použij po 01_prihlaseni.py (ten uloží cpzp_cookies.json).
|
||||
|
||||
Co dělá:
|
||||
- Prochází Schránku klienta a Schránku PZS od nejnovějších zpráv
|
||||
- Jakmile narazí na zprávu, která už je v Staženo/, okamžitě se zastaví
|
||||
- Vhodné pro pravidelné spouštění — stáhne jen to nové
|
||||
|
||||
POUŽITÍ:
|
||||
python 03_stahuj_nove.py
|
||||
"""
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
BASE_URL = "https://portal.cpzp.cz"
|
||||
COOKIES_FILE = os.path.join(os.path.dirname(__file__), "cpzp_cookies.json")
|
||||
STAZENO_DIR = os.path.join(os.path.dirname(__file__), "Staženo")
|
||||
|
||||
SCHRANKY = [
|
||||
("/app/schranka/", "Schránka klienta"),
|
||||
("/app/schranka-pzs/", "Schránka PZS"),
|
||||
]
|
||||
PAGE_SIZE = 20
|
||||
|
||||
|
||||
def make_session() -> requests.Session:
|
||||
with open(COOKIES_FILE, encoding="utf-8") as f:
|
||||
cookies = json.load(f)
|
||||
s = requests.Session()
|
||||
s.headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
for c in cookies:
|
||||
s.cookies.set(c["name"], c["value"], domain=c["domain"].lstrip("."))
|
||||
|
||||
r = s.get(f"{BASE_URL}/app/", timeout=15)
|
||||
if "frmPrihlasCert" in r.text:
|
||||
raise SystemExit("Cookies expirovala — nejdřív spusť 01_prihlaseni.py")
|
||||
return s
|
||||
|
||||
|
||||
def parse_datum(datum_str: str) -> str:
|
||||
"""'01.04.2026 22:05:42' → '2026-04-01'"""
|
||||
m = re.match(r"(\d{2})\.(\d{2})\.(\d{4})", datum_str.strip())
|
||||
if m:
|
||||
return f"{m.group(3)}-{m.group(2)}-{m.group(1)}"
|
||||
return "0000-00-00"
|
||||
|
||||
|
||||
def safe_name(text: str) -> str:
|
||||
return re.sub(r'[\\/:*?"<>|]', "_", text).strip()
|
||||
|
||||
|
||||
def uz_stazeno(ref: str, nazev_base: str) -> str | None:
|
||||
"""Vrátí název existujícího souboru, nebo None."""
|
||||
if ref:
|
||||
existing = glob.glob(os.path.join(STAZENO_DIR, f"*(Ref. {ref}).*"))
|
||||
if existing:
|
||||
return os.path.basename(existing[0])
|
||||
else:
|
||||
for ext in (".html", ".pdf", ".xml", ".zip"):
|
||||
if os.path.exists(os.path.join(STAZENO_DIR, nazev_base + ext)):
|
||||
return nazev_base + ext
|
||||
return None
|
||||
|
||||
|
||||
def stahni_zpravu(session: requests.Session, msg: dict) -> bool:
|
||||
"""Stáhne zprávu. Vrátí True pokud staženo."""
|
||||
nazev_base = safe_name(f"{msg['datum']} {msg['druh']} (Ref. {msg['ref']})")
|
||||
|
||||
r = session.get(f"{BASE_URL}/app/schranka/detail/{msg['id']}/", timeout=15)
|
||||
r.raise_for_status()
|
||||
soup = BeautifulSoup(r.content, "html.parser", from_encoding="utf-8")
|
||||
dl_link = soup.find("a", href=re.compile(r"/app/schranka/protokol/"))
|
||||
if not dl_link:
|
||||
print(f" [bez přílohy] {msg['id']} — {msg['druh']}")
|
||||
return False
|
||||
|
||||
for pokus in range(3):
|
||||
try:
|
||||
r2 = session.get(BASE_URL + dl_link["href"], timeout=60)
|
||||
r2.raise_for_status()
|
||||
break
|
||||
except requests.exceptions.Timeout:
|
||||
if pokus == 2:
|
||||
print(f" [chyba] timeout po 3 pokusech — {msg['druh']} Ref. {msg['ref']}")
|
||||
return False
|
||||
time.sleep(3)
|
||||
|
||||
ct = r2.headers.get("Content-Type", "")
|
||||
link_text = dl_link.get_text(strip=True)
|
||||
if link_text.lower().endswith(".pdf") or "pdf" in ct:
|
||||
ext = ".pdf"
|
||||
elif link_text.lower().endswith(".xml") or "xml" in ct:
|
||||
ext = ".xml"
|
||||
elif link_text.lower().endswith(".zip") or "zip" in ct:
|
||||
ext = ".zip"
|
||||
else:
|
||||
ext = ".html"
|
||||
|
||||
cil = os.path.join(STAZENO_DIR, nazev_base + ext)
|
||||
with open(cil, "wb") as f:
|
||||
f.write(r2.content)
|
||||
print(f" [OK] {os.path.basename(cil)}")
|
||||
return True
|
||||
|
||||
|
||||
def stahuj_schranku(session: requests.Session, mailbox_url: str, seen_ids: set) -> tuple[int, bool]:
|
||||
"""
|
||||
Prochází schránku od nejnovějších zpráv a stahuje, dokud nenarazí na již staženou.
|
||||
Vrátí (počet stažených, zda bylo dosaženo konce = nalezena stará zpráva).
|
||||
"""
|
||||
stazeno = 0
|
||||
offset = 0
|
||||
|
||||
while True:
|
||||
r = session.get(f"{BASE_URL}{mailbox_url}?offset={offset}", timeout=15)
|
||||
r.raise_for_status()
|
||||
soup = BeautifulSoup(r.content, "html.parser", from_encoding="utf-8")
|
||||
rows = soup.select("tr[id^='message-']")
|
||||
if not rows:
|
||||
print(" Žádné další zprávy.")
|
||||
return stazeno, True
|
||||
|
||||
for row in rows:
|
||||
msg_id = row["id"].replace("message-", "")
|
||||
if msg_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(msg_id)
|
||||
|
||||
cells = row.find_all("td")
|
||||
datum_raw = cells[5].get_text(strip=True) if len(cells) > 5 else ""
|
||||
druh_ref = cells[4].get_text(separator=" ", strip=True) if len(cells) > 4 else ""
|
||||
druh_match = re.match(r"^(.+?)\s+Ref\.\s*č\.\s*(\d+)", druh_ref)
|
||||
druh = druh_match.group(1).strip() if druh_match else druh_ref
|
||||
ref_c = druh_match.group(2).strip() if druh_match else ""
|
||||
|
||||
msg = {
|
||||
"id": msg_id,
|
||||
"datum": parse_datum(datum_raw),
|
||||
"druh": druh,
|
||||
"ref": ref_c,
|
||||
}
|
||||
|
||||
nazev_base = safe_name(f"{msg['datum']} {msg['druh']} (Ref. {msg['ref']})")
|
||||
existing = uz_stazeno(ref_c, nazev_base)
|
||||
if existing:
|
||||
print(f" [stop] Nalezena již stažená zpráva: {existing}")
|
||||
return stazeno, True
|
||||
|
||||
if stahni_zpravu(session, msg):
|
||||
stazeno += 1
|
||||
time.sleep(1.0)
|
||||
|
||||
offset += PAGE_SIZE
|
||||
time.sleep(1.0)
|
||||
|
||||
|
||||
def main():
|
||||
os.makedirs(STAZENO_DIR, exist_ok=True)
|
||||
session = make_session()
|
||||
|
||||
seen_ids: set = set()
|
||||
celkem = 0
|
||||
|
||||
for url, name in SCHRANKY:
|
||||
print(f"\n=== {name} ({url}) ===")
|
||||
stazeno, _ = stahuj_schranku(session, url, seen_ids)
|
||||
celkem += stazeno
|
||||
|
||||
print(f"\nHotovo: {celkem} nových zpráv staženo.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,35 @@
|
||||
"""
|
||||
Přihlásí se na portál ČPZP a stáhne nové zprávy.
|
||||
|
||||
Kombinuje 01_prihlaseni.py + 03_stahuj_nove.py do jednoho spuštění.
|
||||
|
||||
POUŽITÍ:
|
||||
python 04_prihlaseni_a_stahuj_nove.py
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def run(script: str) -> None:
|
||||
result = subprocess.run(
|
||||
[sys.executable, os.path.join(DIR, script)],
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise SystemExit(f"Skript {script} skončil s chybou (kód {result.returncode})")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
print("=== Přihlášení ===")
|
||||
run("01_prihlaseni.py")
|
||||
|
||||
print("\n=== Stahování nových zpráv ===")
|
||||
run("03_stahuj_nove.py")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,7 +1,7 @@
|
||||
[
|
||||
{
|
||||
"name": "PHPSESSID",
|
||||
"value": "7ps03755qsp9n4gpms64rqle77",
|
||||
"value": "5mbiobj1htd5joflu2fpm480a3",
|
||||
"domain": ".portal.cpzp.cz",
|
||||
"path": "/",
|
||||
"expires": -1,
|
||||
|
||||
Reference in New Issue
Block a user