diff --git a/ECG/30 ECG test3.py b/ECG/30 ECG test3.py index 70812ae..fee7038 100644 --- a/ECG/30 ECG test3.py +++ b/ECG/30 ECG test3.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- - - import fitz import os import re @@ -35,6 +33,102 @@ def ocr_page(page): return text +# ─── Medicus ověření ────────────────────────────────────────────────────────── + +def _medicus_connect(): + try: + import fdb + return fdb.connect( + dsn=r"localhost:c:\medicus 3\data\medicus.fdb", + user="SYSDBA", password="masterkey", charset="win1250" + ) + except Exception as e: + print(f" [Medicus] Nepřipojeno: {e}") + return None + +def _lookup_by_rc(cur, rc_digits: str) -> dict | None: + cur.execute( + "SELECT IDPAC, PRIJMENI, JMENO, RODCIS FROM KAR " + "WHERE REPLACE(RODCIS, '/', '') = ?", + (rc_digits,) + ) + row = cur.fetchone() + if row: + return {"idpac": row[0], "prijmeni": row[1].strip(), "jmeno": row[2].strip(), "rodcis": row[3].strip()} + return None + +def _rc_candidates(rc: str) -> list[str]: + similar = {"0": "8", "8": "0", "1": "7", "7": "1", "5": "6", "6": "5", "3": "8"} + candidates = set() + for i in range(len(rc)): + candidates.add(rc[:i] + rc[i+1:]) + for i, ch in enumerate(rc): + if ch in similar: + candidates.add(rc[:i] + similar[ch] + rc[i+1:]) + candidates.discard(rc) + return sorted(candidates) + +def _rc_checksum_ok(rc: str) -> bool: + digits = re.sub(r"\D", "", rc) + if len(digits) == 10: + return int(digits) % 11 == 0 + return True + +def verify_patient(rc_raw: str) -> dict: + """ + Ověří pacienta v Medicus. + status: "ok" | "fuzzy" | "not_found" | "offline" + """ + rc = re.sub(r"\D", "", rc_raw or "") + if not rc: + return {"status": "not_found", "patient": None, "rc_corrected": None} + + con = _medicus_connect() + if con is None: + return {"status": "offline", "patient": None, "rc_corrected": None} + + try: + cur = con.cursor() + patient = _lookup_by_rc(cur, rc) + if patient: + return {"status": "ok", "patient": patient, "rc_corrected": None} + + candidates = _rc_candidates(rc) + matches = [] + for cand in candidates: + p = _lookup_by_rc(cur, cand) + if p: + matches.append((cand, p)) + + if not matches: + return {"status": "not_found", "patient": None, "rc_corrected": None} + + matches.sort(key=lambda x: (0 if _rc_checksum_ok(x[0]) else 1)) + best_rc, best_patient = matches[0] + return {"status": "fuzzy", "patient": best_patient, "rc_corrected": best_rc, "all_matches": matches} + finally: + con.close() + +def print_verification(verif: dict, rc_from_ocr: str): + status = verif["status"] + patient = verif.get("patient") + if status == "ok": + print(f" ✓ Medicus: {patient['prijmeni']} {patient['jmeno']} | RČ {patient['rodcis']}") + elif status == "fuzzy": + rc_corr = verif["rc_corrected"] + print(f" ⚠ Medicus: RČ z OCR '{rc_from_ocr}' nenalezeno") + print(f" → Nalezen podobný pacient: {patient['prijmeni']} {patient['jmeno']} | RČ {patient['rodcis']}") + print(f" → Pravděpodobná oprava RČ: {rc_from_ocr} → {rc_corr} (OCR chyba)") + if len(verif.get("all_matches", [])) > 1: + print(f" → Další shody: {[m[0] for m in verif['all_matches'][1:]]}") + elif status == "not_found": + print(f" ✗ Medicus: RČ '{rc_from_ocr}' nenalezeno ani při fuzzy hledání") + elif status == "offline": + print(f" — Medicus: nedostupný (offline), ověření přeskočeno") + + +# ─── OCR extrakce ───────────────────────────────────────────────────────────── + def extract_rodne_cislo(text): """Extracts RC (6 digits + optional slash/spaces + 3-4 digits).""" m = re.search(r"(\d{6})\s*/?\s*(\d{3,4})", text) @@ -47,11 +141,19 @@ def extract_rodne_cislo(text): def extract_date(text): """Extracts date with high tolerance for OCR spacing errors.""" - # Matches D.M.YYYY, D. M. YYYY, DD.MM.YYYY, etc. - # It also handles if OCR accidentally puts a comma instead of a dot + # Primary: D.M.YYYY, DD.MM.YYYY with dots/commas (also comma instead of dot) m = re.search(r"(\d{1,2})[\.,]\s*(\d{1,2})[\.,]\s*(\d{4})", text) if m: return m.groups() + + # Fallback: compact date without separators — OCR drops the dots + # Try DDMMYYYY (8 digits) first, then DDMYYYY (7 digits) + for pat in [r"\b(\d{2})(\d{2})(\d{4})\b", r"\b(\d{2})(\d{1})(\d{4})\b"]: + for m in re.finditer(pat, text): + d, mo, y = m.groups() + if 1 <= int(d) <= 31 and 1 <= int(mo) <= 12 and 1900 <= int(y) <= 2100: + return (d, mo, y) + return None @@ -105,15 +207,31 @@ for pdf_path in BASE_DIR.glob("*.pdf"): doc.close() - # 3. RENAME LOGIC - if rc and date_tuple: + # 3. MEDICUS VERIFICATION + FUZZY MATCHING + rc_final = rc + if rc: + print(f" [Medicus] Ověřuji RČ {rc}...") + verif = verify_patient(rc) + print_verification(verif, rc) + if verif["status"] == "fuzzy" and verif.get("rc_corrected"): + rc_final = verif["rc_corrected"] + print(f" [Medicus] RČ opraveno na: {rc_final}") + else: + verif = {"status": "not_found", "patient": None, "rc_corrected": None} + + # 4. RENAME LOGIC + if rc_final and date_tuple: date_iso = convert_date_to_iso(date_tuple) - new_name = f"{rc} {date_iso} [EKG] [bez hodnocení].pdf" + patient = verif.get("patient") + if patient: + name_part = f"{patient['prijmeni']}, {patient['jmeno']} " + else: + name_part = "" + new_name = f"{rc_final} {name_part}{date_iso} [EKG] [bez hodnocení].pdf" new_path = pdf_path.with_name(new_name) if not new_path.exists(): try: - # Close the handle properly before renaming pdf_path.rename(new_path) print(f"✅ Success: Renamed to {new_name}") except Exception as e: