Přesunout import_vzp_pracoviste do složky StahovánízVZPWithClaude
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,278 @@
|
||||
"""
|
||||
Import VZP číselníku pracovišť (soubory *.Lh7) do MySQL tabulky vzp_pracoviste.
|
||||
Před importem automaticky stáhne nejnovější soubor z VZP Point (vyžaduje certifikát).
|
||||
Použití: python import_vzp_pracoviste.py [--no-download] [soubor.Lh7]
|
||||
"""
|
||||
|
||||
import csv
|
||||
import glob
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import zipfile
|
||||
from datetime import date, datetime
|
||||
from html.parser import HTMLParser
|
||||
|
||||
# Windows konzole - povol UTF-8 výstup
|
||||
if sys.stdout.encoding != "utf-8":
|
||||
import io
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
import mysql.connector
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "medicus",
|
||||
"charset": "utf8mb4",
|
||||
}
|
||||
|
||||
IMPORT_DIR = os.path.join(os.path.dirname(__file__), "Import")
|
||||
|
||||
VZP_POINT_DOC_URL = "https://point.vzp.cz/Cms/Document"
|
||||
VZP_CERT_FILE = os.path.join(os.path.dirname(__file__), "MichalkaPublicCertProPython.pfx")
|
||||
VZP_CERT_PASSWORD = "" # nastav heslo PFX souboru, pokud bylo při exportu zadáno
|
||||
|
||||
CREATE_TABLE_SQL = """
|
||||
CREATE TABLE IF NOT EXISTS vzp_pracoviste (
|
||||
id INT NOT NULL AUTO_INCREMENT,
|
||||
ico CHAR(8) NOT NULL,
|
||||
icz CHAR(8) NOT NULL,
|
||||
icp CHAR(8) NOT NULL,
|
||||
odbornost VARCHAR(4) NOT NULL,
|
||||
platnost_od DATE NOT NULL,
|
||||
platnost_do DATE NOT NULL,
|
||||
nazev_zarizeni VARCHAR(200),
|
||||
nazev_pracoviste VARCHAR(200),
|
||||
ulice VARCHAR(150),
|
||||
mesto VARCHAR(100),
|
||||
psc CHAR(5),
|
||||
PRIMARY KEY (id),
|
||||
INDEX idx_icp (icp),
|
||||
INDEX idx_icz (icz),
|
||||
INDEX idx_odbornost (odbornost),
|
||||
INDEX idx_platnost (platnost_od, platnost_do)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
||||
"""
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
|
||||
def parse_date(s: str) -> date | None:
|
||||
"""Převede DDMMYYYY na date. Rok 3000 → 9999-12-31."""
|
||||
s = s.strip()
|
||||
if len(s) != 8:
|
||||
return None
|
||||
try:
|
||||
d, m, y = int(s[0:2]), int(s[2:4]), int(s[4:8])
|
||||
if y >= 3000:
|
||||
return date(9999, 12, 31)
|
||||
return date(y, m, d)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def download_latest_file() -> str | None:
|
||||
"""
|
||||
Přihlásí se na VZP Point certifikátem, stáhne nejnovější ICP ZIP,
|
||||
rozbalí PLP111*.Lh7 do Import/ a vrátí cestu. Při chybě vrátí None.
|
||||
"""
|
||||
try:
|
||||
from requests_pkcs12 import Pkcs12Adapter
|
||||
import requests
|
||||
except ImportError:
|
||||
print("[stahování] Chybí knihovny: pip install requests requests-pkcs12")
|
||||
return None
|
||||
|
||||
password = VZP_CERT_PASSWORD.encode() if VZP_CERT_PASSWORD else None
|
||||
|
||||
session = requests.Session()
|
||||
session.mount("https://point.vzp.cz", Pkcs12Adapter(
|
||||
pkcs12_filename=VZP_CERT_FILE,
|
||||
pkcs12_password=password,
|
||||
))
|
||||
|
||||
# Načti stránku s dokumenty
|
||||
try:
|
||||
resp = session.get(VZP_POINT_DOC_URL, timeout=30)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
print(f"[stahování] Chyba při načtení {VZP_POINT_DOC_URL}: {e}")
|
||||
return None
|
||||
|
||||
# Najdi odkaz na *-icp.zip
|
||||
class _LinkParser(HTMLParser):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.icp_links: list[str] = []
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == "a":
|
||||
href = dict(attrs).get("href", "")
|
||||
if re.search(r"-icp\.zip", href, re.IGNORECASE):
|
||||
self.icp_links.append(href)
|
||||
|
||||
parser = _LinkParser()
|
||||
parser.feed(resp.text)
|
||||
|
||||
if not parser.icp_links:
|
||||
print("[stahování] Na stránce VZP Point nebyl nalezen odkaz na *-icp.zip")
|
||||
return None
|
||||
|
||||
# Vyber nejnovější dle data v názvu (YYMMDDHHMMSS-icp.zip)
|
||||
zip_href = sorted(parser.icp_links)[-1]
|
||||
if not zip_href.startswith("http"):
|
||||
zip_href = "https://point.vzp.cz" + zip_href
|
||||
|
||||
# Zkontroluj, jestli už máme aktuální soubor (podle data v názvu ZIP)
|
||||
date_match = re.search(r"(\d{6})\d{6}-icp\.zip", zip_href)
|
||||
zip_date = date_match.group(1) if date_match else "" # YYMMDD
|
||||
lh7_name = f"PLP111{zip_date[:2]}.Lh7" if zip_date else "PLP111??.Lh7"
|
||||
dest = os.path.join(IMPORT_DIR, lh7_name)
|
||||
|
||||
# Najdi případný existující soubor pro stejný rok
|
||||
year_suffix = zip_date[:2] if zip_date else ""
|
||||
existing = glob.glob(os.path.join(IMPORT_DIR, f"PLP111{year_suffix}.Lh7"))
|
||||
if existing:
|
||||
print(f"[stahování] {os.path.basename(existing[0])} již existuje — přeskočeno.")
|
||||
return existing[0]
|
||||
|
||||
# Stáhni ZIP
|
||||
print(f"[stahování] {zip_href}")
|
||||
try:
|
||||
zip_resp = session.get(zip_href, timeout=60)
|
||||
zip_resp.raise_for_status()
|
||||
except Exception as e:
|
||||
print(f"[stahování] Chyba při stahování ZIP: {e}")
|
||||
return None
|
||||
|
||||
# Rozbal Lh7 z archivu
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(zip_resp.content)) as zf:
|
||||
lh7_names = [n for n in zf.namelist() if n.lower().endswith(".lh7")]
|
||||
if not lh7_names:
|
||||
print("[stahování] ZIP neobsahuje žádný .Lh7 soubor")
|
||||
return None
|
||||
lh7_entry = lh7_names[0]
|
||||
dest = os.path.join(IMPORT_DIR, os.path.basename(lh7_entry))
|
||||
os.makedirs(IMPORT_DIR, exist_ok=True)
|
||||
with zf.open(lh7_entry) as src, open(dest, "wb") as out:
|
||||
out.write(src.read())
|
||||
except Exception as e:
|
||||
print(f"[stahování] Chyba při rozbalování ZIP: {e}")
|
||||
return None
|
||||
|
||||
print(f"[stahování] Rozbaleno: {os.path.basename(dest)} ({os.path.getsize(dest):,} B)")
|
||||
return dest
|
||||
|
||||
|
||||
def find_latest_file() -> str:
|
||||
files = glob.glob(os.path.join(IMPORT_DIR, "*.Lh7"))
|
||||
if not files:
|
||||
raise FileNotFoundError(f"Žádný *.Lh7 soubor nenalezen v {IMPORT_DIR}")
|
||||
return max(files, key=os.path.getmtime)
|
||||
|
||||
|
||||
def import_file(filepath: str, conn: mysql.connector.MySQLConnection) -> int:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("DROP TABLE IF EXISTS vzp_pracoviste")
|
||||
cursor.execute(CREATE_TABLE_SQL)
|
||||
conn.commit()
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO vzp_pracoviste
|
||||
(ico, icz, icp, odbornost, platnost_od, platnost_do,
|
||||
nazev_zarizeni, nazev_pracoviste, ulice, mesto, psc)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
batch = []
|
||||
total = 0
|
||||
skipped = 0
|
||||
|
||||
with open(filepath, encoding="cp1250", errors="replace", newline="") as f:
|
||||
reader = csv.reader(f, quotechar='"', skipinitialspace=True)
|
||||
for row in reader:
|
||||
if len(row) < 10:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
platnost_od = parse_date(row[4])
|
||||
platnost_do = parse_date(row[5])
|
||||
if platnost_od is None or platnost_do is None:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# ulice = název ulice + číslo popisné + číslo orientační (sloupce 11,12,13)
|
||||
ulice_parts = [row[11].strip(), row[12].strip(), row[13].strip()] if len(row) > 13 else []
|
||||
ulice = " ".join(p for p in ulice_parts if p) or row[8].strip()
|
||||
|
||||
psc = row[14].strip() if len(row) > 14 else ""
|
||||
if len(psc) > 5:
|
||||
psc = psc[:5]
|
||||
|
||||
batch.append((
|
||||
row[0].strip(), # ico
|
||||
row[1].strip(), # icz
|
||||
row[2].strip(), # icp
|
||||
row[3].strip(), # odbornost
|
||||
platnost_od,
|
||||
platnost_do,
|
||||
row[6].strip()[:200] if len(row) > 6 else "", # nazev_zarizeni
|
||||
row[7].strip()[:200] if len(row) > 7 else "", # nazev_pracoviste
|
||||
ulice[:150],
|
||||
row[9].strip()[:100] if len(row) > 9 else "", # mesto
|
||||
psc,
|
||||
))
|
||||
|
||||
if len(batch) >= BATCH_SIZE:
|
||||
cursor.executemany(insert_sql, batch)
|
||||
conn.commit()
|
||||
total += len(batch)
|
||||
batch.clear()
|
||||
|
||||
if batch:
|
||||
cursor.executemany(insert_sql, batch)
|
||||
conn.commit()
|
||||
total += len(batch)
|
||||
|
||||
cursor.close()
|
||||
return total, skipped
|
||||
|
||||
|
||||
def main():
|
||||
args = sys.argv[1:]
|
||||
no_download = "--no-download" in args
|
||||
args = [a for a in args if a != "--no-download"]
|
||||
|
||||
if args:
|
||||
filepath = args[0]
|
||||
else:
|
||||
if not no_download:
|
||||
downloaded = download_latest_file()
|
||||
if downloaded is None:
|
||||
print("[stahování] Pokračuji s lokálním souborem...")
|
||||
filepath = find_latest_file()
|
||||
|
||||
filename = os.path.basename(filepath)
|
||||
|
||||
print(f"Soubor: {filename}")
|
||||
print(f"Databáze: {DB_CONFIG['host']}/{DB_CONFIG['database']}")
|
||||
print(f"Začátek: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
conn = mysql.connector.connect(**DB_CONFIG)
|
||||
try:
|
||||
total, skipped = import_file(filepath, conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
print(f"Importováno: {total} záznamů")
|
||||
if skipped:
|
||||
print(f"Přeskočeno: {skipped} řádků (neúplná data)")
|
||||
print(f"Hotovo: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user