z230
This commit is contained in:
@@ -1062,5 +1062,9 @@
|
||||
{
|
||||
"original": "3568c410-090f-49d3-b898-8fef12f05397.pdf",
|
||||
"corrected": "8459699809 2026-04-20 Bysmak, Mariia [sono žil DK] [žíly volně průchodné, varikózní dilatace VSM bilat se známkami chlopenní insuficience].pdf"
|
||||
},
|
||||
{
|
||||
"original": "6604011073 2025-12-08 Kramule, Petr [Laboratoř] [stěr/výtěr krk, fyziologická mikrobiota HCD].pdf",
|
||||
"corrected": "6604011073 2025-12-08 Kramule, Petr [Laboratoř] [stěrvýtěr krk, fyziologická mikrobiota HCD].pdf"
|
||||
}
|
||||
]
|
||||
@@ -197,6 +197,119 @@ def check_duplicates(rc: str, datum: str) -> list[str]:
|
||||
return [name for name in _dokumentace_index if name.startswith(prefix)]
|
||||
|
||||
|
||||
# ─── EKG zpracování ──────────────────────────────────────────────────────────
|
||||
|
||||
_EKG_FLAG = "rotated-by-script"
|
||||
|
||||
|
||||
def _is_ekg(pdf_path: Path) -> bool:
|
||||
"""Detekuje EKG PDF podle metadat — PDFCreator 2.4.x je specifický pro EKG přístroj."""
|
||||
if pdf_path.suffix.lower() != ".pdf":
|
||||
return False
|
||||
try:
|
||||
import fitz
|
||||
doc = fitz.open(str(pdf_path))
|
||||
meta = doc.metadata
|
||||
doc.close()
|
||||
haystack = " ".join(filter(None, [
|
||||
meta.get("creator", ""), meta.get("producer", "")
|
||||
])).lower()
|
||||
return "pdfcreator" in haystack
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _ekg_rotate_if_needed(pdf_path: Path):
|
||||
"""Otočí první stránku o 90° CW a odstraní případnou druhou stránku."""
|
||||
import fitz
|
||||
doc = fitz.open(str(pdf_path))
|
||||
meta = doc.metadata
|
||||
keywords = meta.get("keywords", "") or ""
|
||||
if _EKG_FLAG in keywords:
|
||||
doc.close()
|
||||
return
|
||||
page = doc[0]
|
||||
page.set_rotation((page.rotation + 90) % 360)
|
||||
if doc.page_count > 1:
|
||||
doc.delete_page(1)
|
||||
meta["keywords"] = (keywords + " " + _EKG_FLAG).strip()
|
||||
doc.set_metadata(meta)
|
||||
tmp = pdf_path.with_suffix(".tmp.pdf")
|
||||
doc.save(tmp, deflate=True)
|
||||
doc.close()
|
||||
os.replace(tmp, pdf_path)
|
||||
print(" [EKG] Stránka otočena o 90°.")
|
||||
|
||||
|
||||
def _ekg_ocr(pdf_path: Path) -> str:
|
||||
import fitz
|
||||
import pytesseract
|
||||
from PIL import Image as _PILImage
|
||||
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
||||
doc = fitz.open(str(pdf_path))
|
||||
pix = doc[0].get_pixmap(dpi=300)
|
||||
img = _PILImage.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
||||
doc.close()
|
||||
return pytesseract.image_to_string(img, lang="ces", config="--psm 6")
|
||||
|
||||
|
||||
def _ekg_extract_rc(text: str) -> str | None:
|
||||
m = re.search(r"(\d{6})\s*/?\s*(\d{3,4})", text)
|
||||
if not m:
|
||||
return None
|
||||
return m.group(1) + m.group(2).zfill(4)
|
||||
|
||||
|
||||
def _ekg_extract_date(text: str) -> str | None:
|
||||
"""Vrátí datum ve formátu YYYY-MM-DD nebo None."""
|
||||
m = re.search(r"(\d{1,2})[\.,]\s*(\d{1,2})[\.,]\s*(\d{4})", text)
|
||||
if m:
|
||||
d, mo, y = m.groups()
|
||||
return f"{y}-{mo.zfill(2)}-{d.zfill(2)}"
|
||||
for pat in [r"\b(\d{2})(\d{2})(\d{4})\b", r"\b(\d{2})(\d{1})(\d{4})\b"]:
|
||||
for m in re.finditer(pat, text):
|
||||
d, mo, y = m.groups()
|
||||
if 1 <= int(d) <= 31 and 1 <= int(mo) <= 12 and 1900 <= int(y) <= 2100:
|
||||
return f"{y}-{mo.zfill(2)}-{d.zfill(2)}"
|
||||
return None
|
||||
|
||||
|
||||
def extract_info_ekg(pdf_path: Path) -> dict:
|
||||
"""EKG větev: rotace in-place, Tesseract OCR, Medicus ověření."""
|
||||
_ekg_rotate_if_needed(pdf_path)
|
||||
|
||||
print(" [EKG] OCR přes Tesseract...")
|
||||
raw_text = _ekg_ocr(pdf_path)
|
||||
print(f"\n--- EKG OCR TEXT ---\n{raw_text}\n--- KONEC ---\n")
|
||||
|
||||
rc_ocr = _ekg_extract_rc(raw_text)
|
||||
date_iso = _ekg_extract_date(raw_text)
|
||||
print(f" [EKG] RČ: {rc_ocr or 'NENALEZENO'} | Datum: {date_iso or 'NENALEZENO'}")
|
||||
|
||||
print(f" [EKG] Ověřuji v Medicus (RČ: {rc_ocr or '?'})...")
|
||||
verif = verify_patient(rc_ocr or "")
|
||||
rc_final = rc_ocr
|
||||
if verif["status"] == "fuzzy" and verif.get("rc_corrected"):
|
||||
rc_final = verif["rc_corrected"]
|
||||
print(f" [EKG] RČ opraveno: {rc_ocr} → {rc_final}")
|
||||
|
||||
patient = verif.get("patient")
|
||||
name_part = f"{patient['prijmeni']}, {patient['jmeno']}" if patient else ""
|
||||
|
||||
if rc_final and date_iso:
|
||||
nazev = f"{rc_final} {date_iso}{(' ' + name_part) if name_part else ''} [EKG] [bez hodnocení].pdf"
|
||||
else:
|
||||
nazev = None
|
||||
|
||||
return {
|
||||
"rodne_cislo": rc_final,
|
||||
"datum_zpravy": date_iso,
|
||||
"nazev_souboru": nazev,
|
||||
"_verif": verif,
|
||||
"_rc_ocr": rc_ocr or "",
|
||||
}
|
||||
|
||||
|
||||
# ─── Korekce (few-shot příklady) ─────────────────────────────────────────────
|
||||
|
||||
def load_corrections() -> list[dict]:
|
||||
@@ -414,50 +527,64 @@ def _parse_split_filename(name: str) -> tuple[str, str] | None:
|
||||
def process_file(pdf_path: Path):
|
||||
print(f"\nSoubor: {pdf_path.name}")
|
||||
|
||||
# Spusť načítání indexu dokumentace na pozadí — hotovo za dobu volání Claude
|
||||
# Spusť načítání indexu dokumentace na pozadí — hotovo za dobu volání Claude/OCR
|
||||
start_dokumentace_index()
|
||||
|
||||
# 1. Otevři preview originálu
|
||||
is_ekg = _is_ekg(pdf_path)
|
||||
split = None
|
||||
|
||||
if is_ekg:
|
||||
# EKG větev: rotace in-place PŘED preview, pak Tesseract OCR + Medicus
|
||||
print(" [EKG] Detekován EKG soubor (PDFCreator).")
|
||||
info = extract_info_ekg(pdf_path)
|
||||
nazev = info.get("nazev_souboru") or pdf_path.name
|
||||
rc_from_scan = re.sub(r"\D", "", info.get("rodne_cislo") or "")
|
||||
verif = info["_verif"]
|
||||
rc_ocr = info["_rc_ocr"]
|
||||
|
||||
# 1. Otevři preview (pro EKG: soubor je již otočen)
|
||||
preview, geom_file = open_preview(pdf_path)
|
||||
below_y = read_preview_bottom(geom_file)
|
||||
|
||||
# 2. Zjisti RČ a jméno — buď z názvu (split soubor) nebo přes Claude Vision API
|
||||
split = _parse_split_filename(pdf_path.name)
|
||||
if split:
|
||||
rc_from_scan, name_from_filename = split
|
||||
print(f" Split soubor — RČ z názvu: {rc_from_scan}, jméno: {name_from_filename}")
|
||||
# Claude stále voláme, ale předáme mu identitu pacienta — ať se soustředí na obsah
|
||||
info = extract_info(pdf_path, known_patient=name_from_filename, known_rc=rc_from_scan)
|
||||
# RC a jméno bereme z názvu souboru, ne z Claudovy odpovědi
|
||||
nazev = info.get("nazev_souboru") or pdf_path.name
|
||||
nazev = re.sub(r"^\d{9,10}\s+", f"{rc_from_scan} ", nazev) # přepiš RC v názvu naším
|
||||
else:
|
||||
info = extract_info(pdf_path)
|
||||
nazev = info.get("nazev_souboru") or pdf_path.name
|
||||
rc_from_scan = re.sub(r"\D", "", info.get("rodne_cislo") or "")
|
||||
if not is_ekg:
|
||||
# 2. Zjisti RČ a jméno — buď z názvu (split soubor) nebo přes Claude Vision API
|
||||
split = _parse_split_filename(pdf_path.name)
|
||||
if split:
|
||||
rc_from_scan, name_from_filename = split
|
||||
print(f" Split soubor — RČ z názvu: {rc_from_scan}, jméno: {name_from_filename}")
|
||||
info = extract_info(pdf_path, known_patient=name_from_filename, known_rc=rc_from_scan)
|
||||
nazev = info.get("nazev_souboru") or pdf_path.name
|
||||
nazev = re.sub(r"^\d{9,10}\s+", f"{rc_from_scan} ", nazev)
|
||||
else:
|
||||
info = extract_info(pdf_path)
|
||||
nazev = info.get("nazev_souboru") or pdf_path.name
|
||||
rc_from_scan = re.sub(r"\D", "", info.get("rodne_cislo") or "")
|
||||
|
||||
# 3. Medicus ověření + fuzzy matching RČ
|
||||
print(f" Ověřuji v Medicus (RČ: {rc_from_scan})...")
|
||||
verif = verify_patient(rc_from_scan)
|
||||
# 3. Medicus ověření + fuzzy matching RČ
|
||||
print(f" Ověřuji v Medicus (RČ: {rc_from_scan})...")
|
||||
verif = verify_patient(rc_from_scan)
|
||||
rc_ocr = rc_from_scan
|
||||
|
||||
# Oprava RČ při fuzzy matchi (jen pro nesplit soubory — u split máme RC spolehlivé)
|
||||
if not split and verif["status"] == "fuzzy" and verif.get("rc_corrected") and nazev:
|
||||
nazev = nazev.replace(rc_from_scan, verif["rc_corrected"], 1)
|
||||
print(f" → RČ opraveno: {rc_from_scan} → {verif['rc_corrected']}")
|
||||
# Oprava RČ při fuzzy matchi (jen pro nesplit soubory — u split máme RC spolehlivé)
|
||||
if not split and verif["status"] == "fuzzy" and verif.get("rc_corrected") and nazev:
|
||||
nazev = nazev.replace(rc_from_scan, verif["rc_corrected"], 1)
|
||||
print(f" → RČ opraveno: {rc_from_scan} → {verif['rc_corrected']}")
|
||||
|
||||
# Info řádky pro dialog
|
||||
status = verif["status"]
|
||||
patient = verif.get("patient")
|
||||
info_lines = []
|
||||
if split:
|
||||
if is_ekg:
|
||||
info_lines.append("⚡ EKG soubor — Tesseract OCR")
|
||||
elif split:
|
||||
info_lines.append(f"⚡ Split soubor — identita z názvu: {name_from_filename} | RČ {rc_from_scan}")
|
||||
if status == "ok":
|
||||
info_lines.append(f"✓ Medicus: {patient['prijmeni']} {patient['jmeno']} | RČ {patient['rodcis']}")
|
||||
elif status == "fuzzy":
|
||||
info_lines.append(f"⚠ RČ ze skenu '{rc_from_scan}' → opraveno na {verif['rc_corrected']}")
|
||||
info_lines.append(f"⚠ RČ ze skenu '{rc_ocr}' → opraveno na {verif['rc_corrected']}")
|
||||
info_lines.append(f" Pacient: {patient['prijmeni']} {patient['jmeno']} | RČ {patient['rodcis']}")
|
||||
elif status == "not_found":
|
||||
info_lines.append(f"✗ RČ '{rc_from_scan}' nenalezeno v Medicus")
|
||||
info_lines.append(f"✗ RČ '{rc_ocr}' nenalezeno v Medicus")
|
||||
else:
|
||||
info_lines.append("— Medicus nedostupný (offline)")
|
||||
|
||||
@@ -468,7 +595,7 @@ def process_file(pdf_path: Path):
|
||||
info_lines.append(f"⚠ DUPLICITA: {', '.join(duplicity)}")
|
||||
|
||||
if not info_lines:
|
||||
info_lines = ["[Claude nevrátil název — uprav ručně]"]
|
||||
info_lines = ["[uprav ručně]"]
|
||||
print(" Otevírám dialog pro schválení názvu...")
|
||||
final_name = run_rename_dialog(nazev, info_lines, below_y=below_y)
|
||||
|
||||
|
||||
@@ -0,0 +1,177 @@
|
||||
"""
|
||||
Vygeneruje A4 cheat sheet pro Calcudoku 4x4 — kombinace sčítání, násobení, dělení.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from itertools import combinations
|
||||
from math import prod
|
||||
from pathlib import Path
|
||||
|
||||
sys.stdout.reconfigure(encoding="utf-8")
|
||||
|
||||
from reportlab.lib import colors
|
||||
from reportlab.lib.pagesizes import A4
|
||||
from reportlab.lib.units import cm
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
from reportlab.pdfgen.canvas import Canvas
|
||||
|
||||
_fonts_dir = os.path.join(os.environ.get("WINDIR", r"C:\Windows"), "Fonts")
|
||||
pdfmetrics.registerFont(TTFont("Arial", os.path.join(_fonts_dir, "arial.ttf")))
|
||||
pdfmetrics.registerFont(TTFont("ArialBold", os.path.join(_fonts_dir, "arialbd.ttf")))
|
||||
|
||||
OUTPUT = Path(__file__).parent / "cheatsheet_4x4.pdf"
|
||||
|
||||
DIGITS = range(1, 5)
|
||||
PAGE_W, PAGE_H = A4
|
||||
MARGIN = 1.8 * cm
|
||||
|
||||
# Barvy sekcí
|
||||
COL_HEADER_BG = colors.Color(0.20, 0.35, 0.55)
|
||||
COL_HEADER_FG = colors.white
|
||||
ROW_HEADER_BG = colors.Color(0.88, 0.92, 0.97)
|
||||
ROW_ALT_BG = colors.Color(0.96, 0.97, 1.00)
|
||||
ROW_WHITE_BG = colors.white
|
||||
SECTION_COLORS = {
|
||||
"+": colors.Color(0.18, 0.48, 0.30), # zelená
|
||||
"*": colors.Color(0.55, 0.20, 0.20), # červená
|
||||
"/": colors.Color(0.55, 0.40, 0.10), # oranžová
|
||||
}
|
||||
SECTION_LABELS = {"+": "SČÍTÁNÍ +", "*": "NÁSOBENÍ ×", "/": "DĚLENÍ ÷"}
|
||||
|
||||
|
||||
def build_table(op: str) -> dict:
|
||||
"""Vrátí {výsledek: {n_buněk: [combo, ...]}} pro daný operátor."""
|
||||
results: dict = {}
|
||||
for n in range(2, len(DIGITS) + 1):
|
||||
for combo in combinations(DIGITS, n):
|
||||
if op == "+":
|
||||
val = sum(combo)
|
||||
elif op == "*":
|
||||
val = prod(combo)
|
||||
elif op == "/":
|
||||
# dělení: největší / zbytek (seřazeno desc)
|
||||
s = sorted(combo, reverse=True)
|
||||
val = s[0]
|
||||
for x in s[1:]:
|
||||
val //= x
|
||||
# jen celočíselné výsledky odpovídající skutečnému dělení
|
||||
check = s[0]
|
||||
for x in s[1:]:
|
||||
if check % x != 0:
|
||||
val = None
|
||||
break
|
||||
check //= x
|
||||
if val is None:
|
||||
continue
|
||||
results.setdefault(val, {}).setdefault(n, []).append(combo)
|
||||
return results
|
||||
|
||||
|
||||
def combo_str(combo: tuple, op: str) -> str:
|
||||
sym = {"+" : "+", "*": "×", "/": "÷"}[op]
|
||||
return sym.join(str(d) for d in combo)
|
||||
|
||||
|
||||
def draw_section(c: Canvas, op: str, x: float, y: float, width: float) -> float:
|
||||
"""Vykreslí sekci pro jeden operátor. Vrátí y-souřadnici konce sekce."""
|
||||
table = build_table(op)
|
||||
all_ns = sorted({n for sub in table.values() for n in sub})
|
||||
|
||||
col_label_w = 1.1 * cm
|
||||
col_w = (width - col_label_w) / len(all_ns)
|
||||
row_h = 0.72 * cm
|
||||
header_h = 0.8 * cm
|
||||
|
||||
section_color = SECTION_COLORS[op]
|
||||
|
||||
# Nadpis sekce
|
||||
c.setFillColor(section_color)
|
||||
c.rect(x, y - 0.75 * cm, width, 0.75 * cm, fill=1, stroke=0)
|
||||
c.setFillColor(colors.white)
|
||||
c.setFont("ArialBold", 13)
|
||||
c.drawString(x + 0.3 * cm, y - 0.55 * cm, SECTION_LABELS[op])
|
||||
y -= 0.75 * cm
|
||||
|
||||
# Záhlaví sloupců (počet buněk)
|
||||
c.setFillColor(COL_HEADER_BG)
|
||||
c.rect(x, y - header_h, width, header_h, fill=1, stroke=0)
|
||||
c.setFillColor(COL_HEADER_FG)
|
||||
c.setFont("ArialBold", 9)
|
||||
c.drawCentredString(x + col_label_w / 2, y - header_h + 0.22 * cm, "výsledek")
|
||||
for i, n in enumerate(all_ns):
|
||||
cx = x + col_label_w + i * col_w + col_w / 2
|
||||
c.drawCentredString(cx, y - header_h + 0.22 * cm, f"{n} buňky" if n < 4 else "4 buňky")
|
||||
y -= header_h
|
||||
|
||||
# Řádky
|
||||
for ridx, result in enumerate(sorted(table.keys())):
|
||||
bg = ROW_ALT_BG if ridx % 2 == 0 else ROW_WHITE_BG
|
||||
c.setFillColor(bg)
|
||||
c.rect(x, y - row_h, width, row_h, fill=1, stroke=0)
|
||||
|
||||
# Výsledek
|
||||
c.setFillColor(section_color)
|
||||
c.rect(x, y - row_h, col_label_w, row_h, fill=1, stroke=0)
|
||||
c.setFillColor(colors.white)
|
||||
c.setFont("ArialBold", 11)
|
||||
c.drawCentredString(x + col_label_w / 2, y - row_h + 0.18 * cm, str(result))
|
||||
|
||||
# Kombinace
|
||||
c.setFillColor(colors.Color(0.1, 0.1, 0.1))
|
||||
for i, n in enumerate(all_ns):
|
||||
cx = x + col_label_w + i * col_w
|
||||
combos = table[result].get(n, [])
|
||||
if not combos:
|
||||
# šedá pomlčka
|
||||
c.setFillColor(colors.Color(0.75, 0.75, 0.75))
|
||||
c.setFont("Arial", 9)
|
||||
c.drawCentredString(cx + col_w / 2, y - row_h + 0.18 * cm, "—")
|
||||
c.setFillColor(colors.Color(0.1, 0.1, 0.1))
|
||||
else:
|
||||
# více kombinací pod sebou
|
||||
line_h = row_h / len(combos)
|
||||
for ci, combo in enumerate(combos):
|
||||
text = combo_str(combo, op)
|
||||
ty = y - ci * line_h - line_h + 0.16 * cm
|
||||
c.setFont("ArialBold" if len(combos) == 1 else "Arial", 9 if len(combos) > 1 else 10)
|
||||
c.drawCentredString(cx + col_w / 2, ty, text)
|
||||
|
||||
y -= row_h
|
||||
|
||||
# Spodní linka sekce
|
||||
c.setStrokeColor(section_color)
|
||||
c.setLineWidth(1.2)
|
||||
c.line(x, y, x + width, y)
|
||||
c.setLineWidth(0.5)
|
||||
|
||||
return y
|
||||
|
||||
|
||||
def main():
|
||||
c = Canvas(str(OUTPUT), pagesize=A4)
|
||||
width = PAGE_W - 2 * MARGIN
|
||||
|
||||
y = PAGE_H - 1.4 * cm
|
||||
|
||||
# Hlavní nadpis
|
||||
c.setFillColor(colors.Color(0.12, 0.12, 0.30))
|
||||
c.setFont("ArialBold", 18)
|
||||
c.drawCentredString(PAGE_W / 2, y - 0.6 * cm, "Calcudoku 4×4 — Cheat Sheet")
|
||||
c.setFont("Arial", 9)
|
||||
c.setFillColor(colors.Color(0.4, 0.4, 0.4))
|
||||
c.drawCentredString(PAGE_W / 2, y - 1.05 * cm, "Číslice 1–4, v každé kleci bez opakování")
|
||||
y -= 1.5 * cm
|
||||
|
||||
gap = 0.45 * cm
|
||||
for op in ("+", "*", "/"):
|
||||
y = draw_section(c, op, MARGIN, y, width)
|
||||
y -= gap
|
||||
|
||||
c.save()
|
||||
print(f"PDF uloženo: {OUTPUT}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user