This commit is contained in:
2026-05-04 12:10:10 +02:00
parent 804dce8794
commit eed6e192f1
3 changed files with 848 additions and 0 deletions
Binary file not shown.

After

Width:  |  Height:  |  Size: 235 KiB

@@ -0,0 +1,840 @@
"""
rozdelit_pdf.py — Dělení vícestránkového PDF na skupiny pacientů.
Spuštění:
python rozdelit_pdf.py soubor.pdf
Numerická klávesnice:
4 / Left kurzor ←
6 / Right kurzor →
7 / PgUp skok ← o 4 stránky
9 / PgDn skok → o 4 stránky
5 / Space přepni hranici pacienta před touto stránkou
8 / Up přesuň stránku doleva (swap)
2 / Down přesuň stránku doprava (swap)
Enter exportuj všechny skupiny do Split/
Esc konec
"""
import sys
import os
import io
import re
import json
import threading
from pathlib import Path
from typing import Optional
import tkinter as tk
from tkinter import messagebox
from PIL import Image, ImageTk
import fitz # PyMuPDF
# ── Cesty ─────────────────────────────────────────────────────────────────────
ROOT = Path(__file__).resolve().parent.parent # .../Medevio/
sys.path.insert(0, str(ROOT))
from Knihovny.najdi_medicus import get_medicus_config
from Knihovny.najdi_dropbox import get_dropbox_root
_DROPBOX = Path(get_dropbox_root())
_RICOH = _DROPBOX / r"Ordinace\Dokumentace_ke_zpracování\Ricoh Fi-8040"
SPLIT_DIR = _RICOH / "Split"
# ── Env ───────────────────────────────────────────────────────────────────────
def _load_env():
env_path = ROOT / ".env"
if env_path.exists():
for line in env_path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if "=" in line and not line.startswith("#"):
k, v = line.split("=", 1)
os.environ[k.strip()] = v.strip()
_load_env()
# ── Regex ─────────────────────────────────────────────────────────────────────
TESTOVANI = False
PATH_TO_TESTFILE = r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\Ricoh Fi-8040\2026-05-04-07-50-17 - Copy.pdf"
TESSERACT_PATH = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# RČ s lomítkem: 710920/3893
RC_RE_SPLIT = re.compile(r"\b(\d{6})\s*/\s*(\d{3,4})\b")
# RČ za klíčovým slovem (Tesseract špatně přečte diakritiku → tolerujeme ASCII varianty)
RC_RE_KEYWORD = re.compile(
r"(?:C\.?P\.?|R\.?C\.?|RC|ID|NAR)\s*[:\.]?\s*(\d{9,10})\b",
re.IGNORECASE,
)
# Fallback: jakýkoli 9-10místný blok
RC_RE_PLAIN = re.compile(r"\b(\d{9,10})\b")
def _rc_valid(digits: str) -> bool:
if len(digits) not in (9, 10):
return False
month = int(digits[2:4])
day = int(digits[4:6])
return (month in range(1, 13) or month in range(51, 63)) and 1 <= day <= 31
def _extract_rc(text: str) -> Optional[str]:
# 1. lomítko
m = RC_RE_SPLIT.search(text)
if m:
return m.group(1) + m.group(2)
# 2. klíčové slovo + číslo
for m in RC_RE_KEYWORD.finditer(text):
if _rc_valid(m.group(1)):
return m.group(1)
# 3. plain fallback
for m in RC_RE_PLAIN.finditer(text):
if _rc_valid(m.group(1)):
return m.group(1)
return None
def _rc_candidates(rc: str) -> list[str]:
# Vizuálně podobné číslice při OCR — každá číslice může být zaměněna za více variant
similar: dict[str, list[str]] = {
"0": ["8", "6", "5"],
"1": ["7", "6"],
"2": [],
"3": ["8"],
"4": [],
"5": ["6", "0"],
"6": ["5", "0", "1"],
"7": ["1"],
"8": ["0", "3"],
"9": [],
}
candidates = set()
for i in range(len(rc)):
candidates.add(rc[:i] + rc[i+1:])
for i in range(len(rc) + 1):
candidates.add(rc[:i] + "0" + rc[i:])
for i, ch in enumerate(rc):
for alt in similar.get(ch, []):
candidates.add(rc[:i] + alt + rc[i+1:])
candidates.discard(rc)
return sorted(c for c in candidates if len(c) in (9, 10))
def _rc_checksum_ok(rc: str) -> bool:
digits = re.sub(r"\D", "", rc)
return len(digits) == 10 and int(digits) % 11 == 0
def _rc_candidates_level2(rc: str) -> list[str]:
"""Kandidáti se dvěma chybami — filtrováno checksumem aby nebylo příliš mnoho."""
level1 = set(_rc_candidates(rc))
level2 = set()
for c in level1:
level2.update(_rc_candidates(c))
level2 -= level1
level2.discard(rc)
# Bez checksumu by bylo příliš kandidátů — filtrujeme
return sorted(c for c in level2 if len(c) in (9, 10) and _rc_checksum_ok(c))
# ── Medicus ───────────────────────────────────────────────────────────────────
def _verify_medicus(rc_digits: str) -> dict:
try:
import fdb
cfg = get_medicus_config()
con = fdb.connect(dsn=cfg.dsn, user="SYSDBA", password="masterkey", charset="win1250")
try:
cur = con.cursor()
def _lookup(rc: str) -> Optional[dict]:
cur.execute(
"SELECT IDPAC, PRIJMENI, JMENO, RODCIS FROM KAR "
"WHERE REPLACE(RODCIS, '/', '') = ?", (rc,)
)
row = cur.fetchone()
if row:
return {
"idpac": row[0],
"prijmeni": row[1].strip(),
"jmeno": row[2].strip(),
"rodcis": row[3].strip(),
}
return None
p = _lookup(rc_digits)
if p:
return {"status": "ok", "patient": p}
for c in _rc_candidates(rc_digits):
p = _lookup(c)
if p:
return {"status": "fuzzy", "rc_corrected": c, "patient": p}
for c in _rc_candidates_level2(rc_digits):
p = _lookup(c)
if p:
return {"status": "fuzzy", "rc_corrected": c, "patient": p}
return {"status": "not_found", "patient": None}
finally:
con.close()
except Exception as e:
return {"status": "offline", "patient": None, "error": str(e)}
# ── Jméno výstupního souboru ──────────────────────────────────────────────────
def _format_filename(group_idx: int, medicus: Optional[dict]) -> str:
p = medicus.get("patient") if medicus else None
if p:
rc = re.sub(r"\D", "", p["rodcis"])
return f"{rc} {p['prijmeni']}, {p['jmeno']} split_{group_idx:03d}.pdf"
return f"split_{group_idx:03d}.pdf"
# ── OCR worker (pozadí) ───────────────────────────────────────────────────────
class OcrWorker:
"""
Na pozadí OCR-uje stránky: Tesseract → Claude Vision (fallback) → Medicus.
Výsledky cachuje do JSON souboru vedle vstupního PDF.
"""
def __init__(self, doc: fitz.Document, cache_path: Path, on_page_done):
self.doc = doc
self.cache_path = cache_path
self.on_page_done = on_page_done # callback(page_idx: int)
self.results: dict[int, dict] = {}
self._stop = threading.Event()
self._lock = threading.Lock()
self._load_cache()
def _load_cache(self):
if self.cache_path.exists():
try:
data = json.loads(self.cache_path.read_text(encoding="utf-8"))
self.results = {int(k): v for k, v in data.items()}
print(f"[OCR cache] načteno {len(self.results)} stránek z {self.cache_path.name}")
except Exception as e:
print(f"[OCR cache] chyba čtení: {e}")
def _save_cache(self):
with self._lock:
self.cache_path.write_text(
json.dumps(self.results, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def start(self):
t = threading.Thread(target=self._run, daemon=True)
t.start()
def stop(self):
self._stop.set()
def _run(self):
import pytesseract
pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
n = len(self.doc)
for i in range(n):
if self._stop.is_set():
break
if i in self.results:
continue # cache hit
page = self.doc[i]
mat = fitz.Matrix(2.0, 2.0) # 144 DPI — dostatečné pro OCR
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# 1. Tesseract
rc = None
tess_text = None
try:
tess_text = pytesseract.image_to_string(img, lang="ces")
rc = _extract_rc(tess_text)
except Exception as e:
print(f"[OCR str.{i+1}] Tesseract: {e}")
# 2. Medicus — první pokus
medicus = _verify_medicus(rc) if rc else None
# 3. Claude Vision — když Tesseract nenašel RČ, nebo našel ale Medicus nezná
claude_raw = None
if not rc or (medicus and medicus.get("status") == "not_found"):
try:
rc_claude, claude_raw = self._claude_rc(img)
if rc_claude:
medicus_claude = _verify_medicus(rc_claude)
if medicus_claude.get("status") in ("ok", "fuzzy"):
print(f"[OCR str.{i+1}] Claude opravil RČ: {rc}{rc_claude}")
rc = rc_claude
medicus = medicus_claude
elif not rc:
rc = rc_claude
medicus = medicus_claude
except Exception as e:
print(f"[OCR str.{i+1}] Claude: {e}")
result = {
"rc": rc,
"medicus": medicus,
"tesseract_text": tess_text,
"claude_raw": claude_raw,
}
self.results[i] = result
self._save_cache()
self.on_page_done(i)
def _claude_rc(self, img: Image.Image) -> tuple[Optional[str], Optional[str]]:
import anthropic, base64
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=80)
b64 = base64.standard_b64encode(buf.getvalue()).decode()
client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
resp = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=100,
messages=[{"role": "user", "content": [
{"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": b64}},
{"type": "text", "text": (
"Najdi rodné číslo na tomto naskenovaném dokumentu. "
"Vrať JSON: {\"rodne_cislo\": \"XXXXXXXXXX\"} nebo {\"rodne_cislo\": null}. "
"Jen JSON, nic jiného."
)},
]}],
)
raw = resp.content[0].text.strip()
raw = re.sub(r"^```\w*\n?", "", raw).rstrip("`").strip()
try:
rc_raw = json.loads(raw).get("rodne_cislo") or ""
return re.sub(r"\D", "", rc_raw) or None, raw
except Exception:
return None, raw
# ── Thumbnail worker (pozadí) ─────────────────────────────────────────────────
class ThumbnailWorker:
"""Renderuje stránky PDF do PIL Images na pozadí."""
def __init__(self, doc: fitz.Document, thumb_w: int, thumb_h: int, on_thumb_done):
self.doc = doc
self.thumb_w = thumb_w
self.thumb_h = thumb_h
self.on_thumb_done = on_thumb_done # callback(page_idx: int)
self._cache: dict[int, Image.Image] = {}
self._lock = threading.Lock()
t = threading.Thread(target=self._run, daemon=True)
t.start()
def get(self, page_idx: int) -> Optional[Image.Image]:
with self._lock:
return self._cache.get(page_idx)
def _run(self):
for i in range(len(self.doc)):
page = self.doc[i]
rect = page.rect
scale = min(self.thumb_w / rect.width, self.thumb_h / rect.height)
mat = fitz.Matrix(scale, scale)
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Orámuj do pevného plátna
canvas = Image.new("RGB", (self.thumb_w, self.thumb_h), (38, 38, 38))
x = (self.thumb_w - img.width) // 2
y = (self.thumb_h - img.height) // 2
canvas.paste(img, (x, y))
with self._lock:
self._cache[i] = canvas
self.on_thumb_done(i)
# ── Barvy a rozměry ───────────────────────────────────────────────────────────
COLS = 4
BORDER_W = 16 # šířka oddělovače mezi sloty
PAD = 8 # odsazení thumbnaillu od okraje slotu
INFO_H = 108 # výška info pásu pod thumbnailem
TOP_H = 44 # výška stavové lišty nahoře
BOT_H = 44 # výška nápovědy dole
# Rozměry se spočítají dynamicky v SplitterUI.__init__ podle rozlišení monitoru
BG = "#1e1e1e"
BG_SLOT = "#262626"
BG_INFO = "#181818"
C_CURSOR = "#4da6ff"
C_BOUNDARY = "#cc3333"
C_SAME = "#3a3a3a"
C_OK = "#4caf50"
C_FUZZY = "#ff9800"
C_NONE = "#f44336"
C_OFFLINE = "#888888"
C_LOADING = "#555555"
C_TEXT = "#dddddd"
C_DIM = "#666666"
GROUP_COLORS = [
"#1b2a3a", "#2a1b3a", "#1b3a2a", "#3a2a1b",
"#2a3a1b", "#1b2a2a", "#3a1b2a", "#2a2a1b",
]
# ── Hlavní UI ─────────────────────────────────────────────────────────────────
class SplitterUI:
def __init__(self, root: tk.Tk, pdf_path: Path):
self.root = root
self.pdf_path = pdf_path
self.doc = fitz.open(str(pdf_path))
n = len(self.doc)
# Stav
self.page_order: list[int] = list(range(n))
self.boundaries: set[int] = {0} # pozice (v page_order) začínající novou skupinu
self.cursor: int = 0
self.scroll: int = 0 # index nejlevějšího viditelného slotu
# Cache
self.ocr_results: dict[int, dict] = {}
self._photo_cache: dict[tuple, ImageTk.PhotoImage] = {} # (page_idx, rot) → photo
self.rotations: dict[int, int] = {} # page_idx → stupně (0/90/180/270)
# Rozměry
sw = root.winfo_screenwidth()
sh = root.winfo_screenheight()
self.SLOT_W = (sw - (COLS - 1) * BORDER_W) // COLS
self.THUMB_W = self.SLOT_W - 2 * PAD
self.THUMB_H = int(self.THUMB_W * 842 / 595) # A4 poměr
self.CANVAS_W = COLS * self.SLOT_W + (COLS - 1) * BORDER_W
self.CANVAS_H = PAD + self.THUMB_H + PAD + INFO_H
win_h = min(TOP_H + self.CANVAS_H + BOT_H, sh - 60)
root.title(f"PDF Dělení — {pdf_path.name}")
root.configure(bg=BG)
root.geometry(f"{self.CANVAS_W}x{win_h}+0+0")
self._build_ui()
self._start_workers()
# ── Stavba UI ─────────────────────────────────────────────────────────────
def _build_ui(self):
self.top_label = tk.Label(
self.root, bg=BG, fg=C_TEXT,
font=("Consolas", 13), anchor="w", padx=12
)
self.top_label.pack(fill="x", side="top", ipady=4)
self.canvas = tk.Canvas(
self.root, width=self.CANVAS_W, height=self.CANVAS_H,
bg=BG, highlightthickness=0
)
self.canvas.pack(fill="both", expand=True)
hints = (
"4/6: navigace ←/→ 7/9: skok ×4 "
"5/Space: hranice pacienta "
"1/3: přesuň stránku "
"/: otočit ↺CCW *: otočit ↻CW "
"Del/.: smaž stránku "
"Enter: exportuj Esc: konec"
)
self.bot_label = tk.Label(
self.root, text=hints, bg=BG, fg=C_DIM,
font=("Consolas", 11), anchor="center"
)
self.bot_label.pack(fill="x", side="bottom", ipady=6)
self.root.bind("<KeyPress>", self._on_key)
self.root.focus_set()
self._redraw()
# ── Startuji workery ──────────────────────────────────────────────────────
def _start_workers(self):
cache_path = self.pdf_path.parent / (self.pdf_path.stem + "_ocr_cache.json")
self.ocr_worker = OcrWorker(
self.doc, cache_path,
on_page_done=lambda idx: self.root.after(0, self._on_ocr_done, idx),
)
# Přeberu výsledky z cache
self.ocr_results.update(self.ocr_worker.results)
self._auto_detect_boundaries()
self.ocr_worker.start()
self.thumb_worker = ThumbnailWorker(
self.doc, self.THUMB_W, self.THUMB_H,
on_thumb_done=lambda idx: self.root.after(0, self._on_thumb_done, idx),
)
def _auto_detect_boundaries(self):
"""Nastaví hranice jen kde jsou obě strany potvrzeny v Medicus jako různí pacienti."""
prev_rc = None
prev_confirmed = False
for pos, page_idx in enumerate(self.page_order):
r = self.ocr_results.get(page_idx)
if not r:
continue
rc = r.get("rc")
status = (r.get("medicus") or {}).get("status")
confirmed = status in ("ok", "fuzzy")
if rc and confirmed and prev_rc and prev_confirmed and rc != prev_rc:
self.boundaries.add(pos)
if rc and confirmed:
prev_rc = rc
prev_confirmed = True
# ── Callbacky z workerů ───────────────────────────────────────────────────
def _on_ocr_done(self, page_idx: int):
self.ocr_results[page_idx] = self.ocr_worker.results[page_idx]
# Auto-detekce hranice spustí se když doběhne celé OCR a uživatel ještě nic neměnil
if (len(self.ocr_results) == len(self.page_order)
and self.boundaries == {0}):
self._auto_detect_boundaries()
self._redraw()
def _on_thumb_done(self, page_idx: int):
self._rebuild_photo(page_idx)
self._redraw()
def _rebuild_photo(self, page_idx: int):
pil = self.thumb_worker.get(page_idx)
if pil is None:
return
rot = self.rotations.get(page_idx, 0)
key = (page_idx, rot)
if key not in self._photo_cache:
img = pil.rotate(rot, expand=True).resize(
(self.THUMB_W, self.THUMB_H), Image.LANCZOS
)
self._photo_cache[key] = ImageTk.PhotoImage(img)
# ── Klávesnice ────────────────────────────────────────────────────────────
def _on_key(self, event):
ks = event.keysym
kc = event.keycode
# Numpad keycodes (Windows): 96=KP0 97=KP1 ... 105=KP9 110=KP.
# NumLock ON → keysym='1'..'9', keycode=97..105
# NumLock OFF → keysym=Left/Clear/Right/Home/Up/Prior/Down
numpad = {
100: "num4", 101: "num5", 102: "num6",
103: "num7", 105: "num9",
97: "num1", 99: "num3", 110: "numdot",
111: "numslash", 106: "numstar",
}
action = numpad.get(kc) or {
"Left": "num4", "Right": "num6",
"Home": "num7", "Prior": "num9",
"Clear": "num5", "End": "num1",
"Next": "num3", "Delete": "numdot",
"space": "num5",
"KP_Divide": "numslash", "KP_Multiply": "numstar",
"slash": "numslash", "asterisk": "numstar",
}.get(ks)
if action == "num4":
self._move_cursor(-1)
elif action == "num6":
self._move_cursor(1)
elif action == "num7":
self._move_cursor(-COLS)
elif action == "num9":
self._move_cursor(COLS)
elif action == "num5":
self._toggle_boundary()
elif action == "num1":
self._move_page(-1)
elif action == "num3":
self._move_page(1)
elif action == "numslash":
self._rotate_page(90) # CCW
elif action == "numstar":
self._rotate_page(-90) # CW
elif action == "numdot":
self._delete_page()
elif ks in ("Return", "KP_Enter"):
self._export()
elif ks == "Escape":
self.root.quit()
# ── Pohyb a manipulace ────────────────────────────────────────────────────
def _move_cursor(self, delta: int):
n = len(self.page_order)
self.cursor = max(0, min(n - 1, self.cursor + delta))
if self.cursor < self.scroll:
self.scroll = self.cursor
elif self.cursor >= self.scroll + COLS:
self.scroll = self.cursor - COLS + 1
self._redraw()
def _toggle_boundary(self):
pos = self.cursor
if pos == 0:
return
if pos in self.boundaries:
self.boundaries.discard(pos)
else:
self.boundaries.add(pos)
self._redraw()
def _rotate_page(self, delta: int):
page_idx = self.page_order[self.cursor]
rot = (self.rotations.get(page_idx, 0) + delta) % 360
self.rotations[page_idx] = rot
self._rebuild_photo(page_idx)
self._redraw()
def _delete_page(self):
n = len(self.page_order)
if n == 1:
return
pos = self.cursor
self.page_order.pop(pos)
# Posuň hranice: odstraň hranici na pos, posuň vyšší o -1
self.boundaries = {
b - 1 if b > pos else b
for b in self.boundaries
if b != pos
}
self.boundaries.add(0) # první stránka je vždy začátek
self.cursor = min(pos, len(self.page_order) - 1)
if self.cursor < self.scroll:
self.scroll = self.cursor
self._redraw()
def _move_page(self, delta: int):
n = len(self.page_order)
pos = self.cursor
new_pos = pos + delta
if new_pos < 0 or new_pos >= n:
return
self.page_order[pos], self.page_order[new_pos] = (
self.page_order[new_pos], self.page_order[pos]
)
self.cursor = new_pos
if self.cursor < self.scroll:
self.scroll = self.cursor
elif self.cursor >= self.scroll + COLS:
self.scroll = self.cursor - COLS + 1
self._redraw()
# ── Skupiny ───────────────────────────────────────────────────────────────
def _group_of_pos(self) -> list[int]:
"""Vrátí seznam: group_idx pro každou pozici v page_order."""
result = []
gi = 0
for pos in range(len(self.page_order)):
if pos in self.boundaries and pos > 0:
gi += 1
result.append(gi)
return result
def _get_groups(self) -> list[list[int]]:
"""Vrátí skupiny: každá je list page_idx (v pořadí z page_order)."""
groups: list[list[int]] = []
current: list[int] = []
for pos, page_idx in enumerate(self.page_order):
if pos in self.boundaries and current:
groups.append(current)
current = []
current.append(page_idx)
if current:
groups.append(current)
return groups
def _best_medicus(self, pages: list[int]) -> Optional[dict]:
for status in ("ok", "fuzzy"):
for p in pages:
r = self.ocr_results.get(p)
if r and r.get("medicus") and r["medicus"].get("status") == status:
return r["medicus"]
return None
# ── Export ────────────────────────────────────────────────────────────────
def _export(self):
groups = self._get_groups()
SPLIT_DIR.mkdir(parents=True, exist_ok=True)
names = []
for i, pages in enumerate(groups, 1):
med = self._best_medicus(pages)
name = _format_filename(i, med)
out_path = SPLIT_DIR / name
out_doc = fitz.open()
for page_idx in pages:
out_doc.insert_pdf(self.doc, from_page=page_idx, to_page=page_idx)
rot = self.rotations.get(page_idx, 0)
if rot:
out_doc[-1].set_rotation((out_doc[-1].rotation - rot) % 360)
out_doc.save(str(out_path))
out_doc.close()
names.append(f"{name} ({len(pages)} str.)")
print(f" Exportováno: {name}")
messagebox.showinfo(
"Export hotov",
f"Exportováno {len(groups)} skupin do:\n{SPLIT_DIR}\n\n" + "\n".join(names),
)
# ── Kreslení ──────────────────────────────────────────────────────────────
def _redraw(self):
c = self.canvas
c.delete("all")
n = len(self.page_order)
group_of = self._group_of_pos()
ocr_done = sum(1 for i in range(n) if i in self.ocr_results)
# Pozadí
c.create_rectangle(0, 0, self.CANVAS_W, self.CANVAS_H, fill=BG, outline="")
for col in range(COLS):
pos = self.scroll + col
if pos >= n:
break
page_idx = self.page_order[pos]
gi = group_of[pos]
x0 = col * (self.SLOT_W + BORDER_W)
# ── Pozadí slotu ─────────────────────────────────────────────────
slot_color = GROUP_COLORS[gi % len(GROUP_COLORS)]
c.create_rectangle(
x0, 0, x0 + self.SLOT_W, PAD + self.THUMB_H + PAD,
fill=slot_color, outline=""
)
# ── Thumbnail ────────────────────────────────────────────────────
rot = self.rotations.get(page_idx, 0)
photo = self._photo_cache.get((page_idx, rot))
if photo:
c.create_image(x0 + PAD, PAD, anchor="nw", image=photo)
else:
c.create_text(
x0 + self.SLOT_W // 2, PAD + self.THUMB_H // 2,
text=f"\nstr. {pos + 1}",
fill=C_LOADING, font=("Consolas", 18), justify="center"
)
# ── Kurzor ───────────────────────────────────────────────────────
if pos == self.cursor:
c.create_rectangle(
x0 + 2, 2, x0 + self.SLOT_W - 2, PAD + self.THUMB_H + PAD - 2,
outline=C_CURSOR, width=5
)
# ── Info pás ─────────────────────────────────────────────────────
y_info = PAD + self.THUMB_H + PAD
c.create_rectangle(
x0, y_info, x0 + self.SLOT_W, y_info + INFO_H,
fill=BG_INFO, outline=""
)
result = self.ocr_results.get(page_idx)
if result is None:
rc_line = "⏳ OCR probíhá…"
pat_line = ""
stat_color = C_LOADING
else:
rc = result.get("rc")
rc_line = f"RČ: {rc}" if rc else "RČ: nenalezeno"
med = result.get("medicus")
if med:
s = med["status"]
p = med.get("patient")
if s == "ok" and p:
pat_line = f"{p['prijmeni']} {p['jmeno']}"
stat_color = C_OK
elif s == "fuzzy" and p:
pat_line = f"~ {p['prijmeni']} {p['jmeno']}"
stat_color = C_FUZZY
elif s == "not_found":
pat_line = "Nenalezen v Medicus"
stat_color = C_NONE
else:
pat_line = "Medicus offline"
stat_color = C_OFFLINE
elif rc:
pat_line = "Ověřuji…"
stat_color = C_LOADING
else:
pat_line = ""
stat_color = C_NONE
c.create_text(
x0 + 8, y_info + 6,
text=f"str. {pos + 1}/{n} (orig: {page_idx + 1})",
anchor="nw", fill=C_DIM, font=("Consolas", 10)
)
c.create_text(
x0 + 8, y_info + 26,
text=rc_line,
anchor="nw", fill=stat_color, font=("Consolas", 13, "bold")
)
c.create_text(
x0 + 8, y_info + 52,
text=pat_line,
anchor="nw", fill=stat_color, font=("Consolas", 14, "bold")
)
# ── Oddělovač napravo od tohoto slotu ────────────────────────────
if col < COLS - 1:
next_pos = pos + 1
is_new = next_pos in self.boundaries
x_sep = x0 + self.SLOT_W
c.create_rectangle(
x_sep, 0, x_sep + BORDER_W, self.CANVAS_H,
fill=C_BOUNDARY if is_new else C_SAME, outline=""
)
if is_new:
c.create_text(
x_sep + BORDER_W // 2, self.CANVAS_H // 2,
text="\nNOVÝ",
fill="white", font=("Consolas", 7, "bold"), justify="center"
)
# ── Stavová lišta nahoře ──────────────────────────────────────────────
groups = self._get_groups()
self.top_label.config(
text=(
f" str. {self.cursor + 1}/{n}"
f"skupiny: {len(groups)}"
f"OCR: {ocr_done}/{n}"
f"{self.pdf_path.name}"
)
)
# ── Vstup ─────────────────────────────────────────────────────────────────────
def main():
root = tk.Tk()
root.withdraw()
if len(sys.argv) >= 2:
pdf_path = Path(sys.argv[1])
elif TESTOVANI:
pdf_path = Path(PATH_TO_TESTFILE)
else:
from tkinter import filedialog
chosen = filedialog.askopenfilename(
title="Vyber vstupní PDF",
initialdir=str(_RICOH),
filetypes=[("PDF soubory", "*.pdf")],
)
if not chosen:
root.destroy()
sys.exit(0)
pdf_path = Path(chosen)
if not pdf_path.exists():
print(f"Soubor nenalezen: {pdf_path}")
root.destroy()
sys.exit(1)
root.deiconify()
app = SplitterUI(root, pdf_path)
root.mainloop()
if __name__ == "__main__":
main()