""" rozdelit_pdf.py — Dělení vícestránkového PDF na skupiny pacientů. Spuštění: python rozdelit_pdf.py soubor.pdf Numerická klávesnice: 4 / Left kurzor ← 6 / Right kurzor → 7 / PgUp skok ← o 4 stránky 9 / PgDn skok → o 4 stránky 5 / Space přepni hranici pacienta před touto stránkou 8 / Up přesuň stránku doleva (swap) 2 / Down přesuň stránku doprava (swap) Enter exportuj všechny skupiny do Split/ Esc konec """ import sys import os import io import re import json import threading from pathlib import Path from typing import Optional import tkinter as tk from tkinter import messagebox from PIL import Image, ImageTk import fitz # PyMuPDF # ── Cesty ───────────────────────────────────────────────────────────────────── ROOT = Path(__file__).resolve().parent.parent # .../Medevio/ sys.path.insert(0, str(ROOT)) from Knihovny.najdi_medicus import get_medicus_config from Knihovny.najdi_dropbox import get_dropbox_root _DROPBOX = Path(get_dropbox_root()) _RICOH = _DROPBOX / r"Ordinace\Dokumentace_ke_zpracování\Ricoh Fi-8040" SPLIT_DIR = _RICOH / "Split" # ── Env ─────────────────────────────────────────────────────────────────────── def _load_env(): env_path = ROOT / ".env" if env_path.exists(): for line in env_path.read_text(encoding="utf-8").splitlines(): line = line.strip() if "=" in line and not line.startswith("#"): k, v = line.split("=", 1) os.environ[k.strip()] = v.strip() _load_env() # ── Regex ───────────────────────────────────────────────────────────────────── TESTOVANI = False PATH_TO_TESTFILE = r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\Ricoh Fi-8040\2026-05-04-07-50-17 - Copy.pdf" TESSERACT_PATH = r"C:\Program Files\Tesseract-OCR\tesseract.exe" # RČ s lomítkem: 710920/3893 RC_RE_SPLIT = re.compile(r"\b(\d{6})\s*/\s*(\d{3,4})\b") # RČ za klíčovým slovem (Tesseract špatně přečte diakritiku → tolerujeme ASCII varianty) RC_RE_KEYWORD = re.compile( r"(?:C\.?P\.?|R\.?C\.?|RC|ID|NAR)\s*[:\.]?\s*(\d{9,10})\b", re.IGNORECASE, ) # Fallback: jakýkoli 9-10místný blok RC_RE_PLAIN = re.compile(r"\b(\d{9,10})\b") def _rc_valid(digits: str) -> bool: if len(digits) not in (9, 10): return False month = int(digits[2:4]) day = int(digits[4:6]) return (month in range(1, 13) or month in range(51, 63)) and 1 <= day <= 31 def _extract_rc(text: str) -> Optional[str]: # 1. lomítko m = RC_RE_SPLIT.search(text) if m: return m.group(1) + m.group(2) # 2. klíčové slovo + číslo for m in RC_RE_KEYWORD.finditer(text): if _rc_valid(m.group(1)): return m.group(1) # 3. plain fallback for m in RC_RE_PLAIN.finditer(text): if _rc_valid(m.group(1)): return m.group(1) return None def _rc_candidates(rc: str) -> list[str]: # Vizuálně podobné číslice při OCR — každá číslice může být zaměněna za více variant similar: dict[str, list[str]] = { "0": ["8", "6", "5"], "1": ["7", "6"], "2": [], "3": ["8"], "4": [], "5": ["6", "0"], "6": ["5", "0", "1"], "7": ["1"], "8": ["0", "3"], "9": [], } candidates = set() for i in range(len(rc)): candidates.add(rc[:i] + rc[i+1:]) for i in range(len(rc) + 1): candidates.add(rc[:i] + "0" + rc[i:]) for i, ch in enumerate(rc): for alt in similar.get(ch, []): candidates.add(rc[:i] + alt + rc[i+1:]) candidates.discard(rc) return sorted(c for c in candidates if len(c) in (9, 10)) def _rc_checksum_ok(rc: str) -> bool: digits = re.sub(r"\D", "", rc) return len(digits) == 10 and int(digits) % 11 == 0 def _rc_candidates_level2(rc: str) -> list[str]: """Kandidáti se dvěma chybami — filtrováno checksumem aby nebylo příliš mnoho.""" level1 = set(_rc_candidates(rc)) level2 = set() for c in level1: level2.update(_rc_candidates(c)) level2 -= level1 level2.discard(rc) # Bez checksumu by bylo příliš kandidátů — filtrujeme return sorted(c for c in level2 if len(c) in (9, 10) and _rc_checksum_ok(c)) # ── Medicus ─────────────────────────────────────────────────────────────────── def _verify_medicus(rc_digits: str) -> dict: try: import fdb cfg = get_medicus_config() con = fdb.connect(dsn=cfg.dsn, user="SYSDBA", password="masterkey", charset="win1250") try: cur = con.cursor() def _lookup(rc: str) -> Optional[dict]: cur.execute( "SELECT IDPAC, PRIJMENI, JMENO, RODCIS FROM KAR " "WHERE REPLACE(RODCIS, '/', '') = ?", (rc,) ) row = cur.fetchone() if row: return { "idpac": row[0], "prijmeni": row[1].strip(), "jmeno": row[2].strip(), "rodcis": row[3].strip(), } return None p = _lookup(rc_digits) if p: return {"status": "ok", "patient": p} for c in _rc_candidates(rc_digits): p = _lookup(c) if p: return {"status": "fuzzy", "rc_corrected": c, "patient": p} for c in _rc_candidates_level2(rc_digits): p = _lookup(c) if p: return {"status": "fuzzy", "rc_corrected": c, "patient": p} return {"status": "not_found", "patient": None} finally: con.close() except Exception as e: return {"status": "offline", "patient": None, "error": str(e)} # ── Jméno výstupního souboru ────────────────────────────────────────────────── def _format_filename(group_idx: int, medicus: Optional[dict]) -> str: p = medicus.get("patient") if medicus else None if p: rc = re.sub(r"\D", "", p["rodcis"]) return f"{rc} {p['prijmeni']}, {p['jmeno']} split_{group_idx:03d}.pdf" return f"split_{group_idx:03d}.pdf" # ── OCR worker (pozadí) ─────────────────────────────────────────────────────── class OcrWorker: """ Na pozadí OCR-uje stránky: Tesseract → Claude Vision (fallback) → Medicus. Výsledky cachuje do JSON souboru vedle vstupního PDF. """ def __init__(self, doc: fitz.Document, cache_path: Path, on_page_done): self.doc = doc self.cache_path = cache_path self.on_page_done = on_page_done # callback(page_idx: int) self.results: dict[int, dict] = {} self._stop = threading.Event() self._lock = threading.Lock() self._load_cache() def _load_cache(self): if self.cache_path.exists(): try: data = json.loads(self.cache_path.read_text(encoding="utf-8")) self.results = {int(k): v for k, v in data.items()} print(f"[OCR cache] načteno {len(self.results)} stránek z {self.cache_path.name}") except Exception as e: print(f"[OCR cache] chyba čtení: {e}") def _save_cache(self): with self._lock: self.cache_path.write_text( json.dumps(self.results, ensure_ascii=False, indent=2), encoding="utf-8", ) def start(self): t = threading.Thread(target=self._run, daemon=True) t.start() def stop(self): self._stop.set() def _run(self): import pytesseract pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH n = len(self.doc) for i in range(n): if self._stop.is_set(): break if i in self.results: continue # cache hit page = self.doc[i] mat = fitz.Matrix(2.0, 2.0) # 144 DPI — dostatečné pro OCR pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # 1. Tesseract rc = None tess_text = None try: tess_text = pytesseract.image_to_string(img, lang="ces") rc = _extract_rc(tess_text) except Exception as e: print(f"[OCR str.{i+1}] Tesseract: {e}") # 2. Medicus — první pokus medicus = _verify_medicus(rc) if rc else None # 3. Claude Vision — když Tesseract nenašel RČ, nebo našel ale Medicus nezná claude_raw = None if not rc or (medicus and medicus.get("status") == "not_found"): try: rc_claude, claude_raw = self._claude_rc(img) if rc_claude: medicus_claude = _verify_medicus(rc_claude) if medicus_claude.get("status") in ("ok", "fuzzy"): print(f"[OCR str.{i+1}] Claude opravil RČ: {rc} → {rc_claude}") rc = rc_claude medicus = medicus_claude elif not rc: rc = rc_claude medicus = medicus_claude except Exception as e: print(f"[OCR str.{i+1}] Claude: {e}") result = { "rc": rc, "medicus": medicus, "tesseract_text": tess_text, "claude_raw": claude_raw, } self.results[i] = result self._save_cache() self.on_page_done(i) def _claude_rc(self, img: Image.Image) -> tuple[Optional[str], Optional[str]]: import anthropic, base64 buf = io.BytesIO() img.save(buf, format="JPEG", quality=80) b64 = base64.standard_b64encode(buf.getvalue()).decode() client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) resp = client.messages.create( model="claude-sonnet-4-6", max_tokens=100, messages=[{"role": "user", "content": [ {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": b64}}, {"type": "text", "text": ( "Najdi rodné číslo na tomto naskenovaném dokumentu. " "Vrať JSON: {\"rodne_cislo\": \"XXXXXXXXXX\"} nebo {\"rodne_cislo\": null}. " "Jen JSON, nic jiného." )}, ]}], ) raw = resp.content[0].text.strip() raw = re.sub(r"^```\w*\n?", "", raw).rstrip("`").strip() try: rc_raw = json.loads(raw).get("rodne_cislo") or "" return re.sub(r"\D", "", rc_raw) or None, raw except Exception: return None, raw # ── Thumbnail worker (pozadí) ───────────────────────────────────────────────── class ThumbnailWorker: """Renderuje stránky PDF do PIL Images na pozadí.""" def __init__(self, doc: fitz.Document, thumb_w: int, thumb_h: int, on_thumb_done): self.doc = doc self.thumb_w = thumb_w self.thumb_h = thumb_h self.on_thumb_done = on_thumb_done # callback(page_idx: int) self._cache: dict[int, Image.Image] = {} self._lock = threading.Lock() t = threading.Thread(target=self._run, daemon=True) t.start() def get(self, page_idx: int) -> Optional[Image.Image]: with self._lock: return self._cache.get(page_idx) def _run(self): for i in range(len(self.doc)): page = self.doc[i] rect = page.rect scale = min(self.thumb_w / rect.width, self.thumb_h / rect.height) mat = fitz.Matrix(scale, scale) pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # Orámuj do pevného plátna canvas = Image.new("RGB", (self.thumb_w, self.thumb_h), (38, 38, 38)) x = (self.thumb_w - img.width) // 2 y = (self.thumb_h - img.height) // 2 canvas.paste(img, (x, y)) with self._lock: self._cache[i] = canvas self.on_thumb_done(i) # ── Barvy a rozměry ─────────────────────────────────────────────────────────── COLS = 4 BORDER_W = 16 # šířka oddělovače mezi sloty PAD = 8 # odsazení thumbnaillu od okraje slotu INFO_H = 108 # výška info pásu pod thumbnailem TOP_H = 44 # výška stavové lišty nahoře BOT_H = 44 # výška nápovědy dole # Rozměry se spočítají dynamicky v SplitterUI.__init__ podle rozlišení monitoru BG = "#1e1e1e" BG_SLOT = "#262626" BG_INFO = "#181818" C_CURSOR = "#4da6ff" C_BOUNDARY = "#cc3333" C_SAME = "#3a3a3a" C_OK = "#4caf50" C_FUZZY = "#ff9800" C_NONE = "#f44336" C_OFFLINE = "#888888" C_LOADING = "#555555" C_TEXT = "#dddddd" C_DIM = "#666666" GROUP_COLORS = [ "#1b2a3a", "#2a1b3a", "#1b3a2a", "#3a2a1b", "#2a3a1b", "#1b2a2a", "#3a1b2a", "#2a2a1b", ] # ── Hlavní UI ───────────────────────────────────────────────────────────────── class SplitterUI: def __init__(self, root: tk.Tk, pdf_path: Path): self.root = root self.pdf_path = pdf_path self.doc = fitz.open(str(pdf_path)) n = len(self.doc) # Stav self.page_order: list[int] = list(range(n)) self.boundaries: set[int] = {0} # pozice (v page_order) začínající novou skupinu self.cursor: int = 0 self.scroll: int = 0 # index nejlevějšího viditelného slotu # Cache self.ocr_results: dict[int, dict] = {} self._photo_cache: dict[tuple, ImageTk.PhotoImage] = {} # (page_idx, rot) → photo self.rotations: dict[int, int] = {} # page_idx → stupně (0/90/180/270) # Rozměry sw = root.winfo_screenwidth() sh = root.winfo_screenheight() self.SLOT_W = (sw - (COLS - 1) * BORDER_W) // COLS self.THUMB_W = self.SLOT_W - 2 * PAD self.THUMB_H = int(self.THUMB_W * 842 / 595) # A4 poměr self.CANVAS_W = COLS * self.SLOT_W + (COLS - 1) * BORDER_W self.CANVAS_H = PAD + self.THUMB_H + PAD + INFO_H win_h = min(TOP_H + self.CANVAS_H + BOT_H, sh - 60) root.title(f"PDF Dělení — {pdf_path.name}") root.configure(bg=BG) root.geometry(f"{self.CANVAS_W}x{win_h}+0+0") self._build_ui() self._start_workers() # ── Stavba UI ───────────────────────────────────────────────────────────── def _build_ui(self): self.top_label = tk.Label( self.root, bg=BG, fg=C_TEXT, font=("Consolas", 13), anchor="w", padx=12 ) self.top_label.pack(fill="x", side="top", ipady=4) self.canvas = tk.Canvas( self.root, width=self.CANVAS_W, height=self.CANVAS_H, bg=BG, highlightthickness=0 ) self.canvas.pack(fill="both", expand=True) hints = ( "4/6: navigace ←/→ 7/9: skok ×4 " "5/Space: hranice pacienta " "1/3: přesuň stránku " "/: otočit ↺CCW *: otočit ↻CW " "Del/.: smaž stránku " "Enter: exportuj Esc: konec" ) self.bot_label = tk.Label( self.root, text=hints, bg=BG, fg=C_DIM, font=("Consolas", 11), anchor="center" ) self.bot_label.pack(fill="x", side="bottom", ipady=6) self.root.bind("", self._on_key) self.root.focus_set() self._redraw() # ── Startuji workery ────────────────────────────────────────────────────── def _start_workers(self): cache_path = self.pdf_path.parent / (self.pdf_path.stem + "_ocr_cache.json") self.ocr_worker = OcrWorker( self.doc, cache_path, on_page_done=lambda idx: self.root.after(0, self._on_ocr_done, idx), ) # Přeberu výsledky z cache self.ocr_results.update(self.ocr_worker.results) self._auto_detect_boundaries() self.ocr_worker.start() self.thumb_worker = ThumbnailWorker( self.doc, self.THUMB_W, self.THUMB_H, on_thumb_done=lambda idx: self.root.after(0, self._on_thumb_done, idx), ) def _auto_detect_boundaries(self): """Nastaví hranice jen kde jsou obě strany potvrzeny v Medicus jako různí pacienti.""" prev_rc = None prev_confirmed = False for pos, page_idx in enumerate(self.page_order): r = self.ocr_results.get(page_idx) if not r: continue rc = r.get("rc") status = (r.get("medicus") or {}).get("status") confirmed = status in ("ok", "fuzzy") if rc and confirmed and prev_rc and prev_confirmed and rc != prev_rc: self.boundaries.add(pos) if rc and confirmed: prev_rc = rc prev_confirmed = True # ── Callbacky z workerů ─────────────────────────────────────────────────── def _on_ocr_done(self, page_idx: int): self.ocr_results[page_idx] = self.ocr_worker.results[page_idx] # Auto-detekce hranice spustí se když doběhne celé OCR a uživatel ještě nic neměnil if (len(self.ocr_results) == len(self.page_order) and self.boundaries == {0}): self._auto_detect_boundaries() self._redraw() def _on_thumb_done(self, page_idx: int): self._rebuild_photo(page_idx) self._redraw() def _rebuild_photo(self, page_idx: int): pil = self.thumb_worker.get(page_idx) if pil is None: return rot = self.rotations.get(page_idx, 0) key = (page_idx, rot) if key not in self._photo_cache: img = pil.rotate(rot, expand=True).resize( (self.THUMB_W, self.THUMB_H), Image.LANCZOS ) self._photo_cache[key] = ImageTk.PhotoImage(img) # ── Klávesnice ──────────────────────────────────────────────────────────── def _on_key(self, event): ks = event.keysym kc = event.keycode # Numpad keycodes (Windows): 96=KP0 97=KP1 ... 105=KP9 110=KP. # NumLock ON → keysym='1'..'9', keycode=97..105 # NumLock OFF → keysym=Left/Clear/Right/Home/Up/Prior/Down numpad = { 100: "num4", 101: "num5", 102: "num6", 103: "num7", 105: "num9", 97: "num1", 99: "num3", 110: "numdot", 111: "numslash", 106: "numstar", } action = numpad.get(kc) or { "Left": "num4", "Right": "num6", "Home": "num7", "Prior": "num9", "Clear": "num5", "End": "num1", "Next": "num3", "Delete": "numdot", "space": "num5", "KP_Divide": "numslash", "KP_Multiply": "numstar", "slash": "numslash", "asterisk": "numstar", }.get(ks) if action == "num4": self._move_cursor(-1) elif action == "num6": self._move_cursor(1) elif action == "num7": self._move_cursor(-COLS) elif action == "num9": self._move_cursor(COLS) elif action == "num5": self._toggle_boundary() elif action == "num1": self._move_page(-1) elif action == "num3": self._move_page(1) elif action == "numslash": self._rotate_page(90) # CCW elif action == "numstar": self._rotate_page(-90) # CW elif action == "numdot": self._delete_page() elif ks in ("Return", "KP_Enter"): self._export() elif ks == "Escape": self.root.quit() # ── Pohyb a manipulace ──────────────────────────────────────────────────── def _move_cursor(self, delta: int): n = len(self.page_order) self.cursor = max(0, min(n - 1, self.cursor + delta)) if self.cursor < self.scroll: self.scroll = self.cursor elif self.cursor >= self.scroll + COLS: self.scroll = self.cursor - COLS + 1 self._redraw() def _toggle_boundary(self): pos = self.cursor if pos == 0: return if pos in self.boundaries: self.boundaries.discard(pos) else: self.boundaries.add(pos) self._redraw() def _rotate_page(self, delta: int): page_idx = self.page_order[self.cursor] rot = (self.rotations.get(page_idx, 0) + delta) % 360 self.rotations[page_idx] = rot self._rebuild_photo(page_idx) self._redraw() def _delete_page(self): n = len(self.page_order) if n == 1: return pos = self.cursor self.page_order.pop(pos) # Posuň hranice: odstraň hranici na pos, posuň vyšší o -1 self.boundaries = { b - 1 if b > pos else b for b in self.boundaries if b != pos } self.boundaries.add(0) # první stránka je vždy začátek self.cursor = min(pos, len(self.page_order) - 1) if self.cursor < self.scroll: self.scroll = self.cursor self._redraw() def _move_page(self, delta: int): n = len(self.page_order) pos = self.cursor new_pos = pos + delta if new_pos < 0 or new_pos >= n: return self.page_order[pos], self.page_order[new_pos] = ( self.page_order[new_pos], self.page_order[pos] ) self.cursor = new_pos if self.cursor < self.scroll: self.scroll = self.cursor elif self.cursor >= self.scroll + COLS: self.scroll = self.cursor - COLS + 1 self._redraw() # ── Skupiny ─────────────────────────────────────────────────────────────── def _group_of_pos(self) -> list[int]: """Vrátí seznam: group_idx pro každou pozici v page_order.""" result = [] gi = 0 for pos in range(len(self.page_order)): if pos in self.boundaries and pos > 0: gi += 1 result.append(gi) return result def _get_groups(self) -> list[list[int]]: """Vrátí skupiny: každá je list page_idx (v pořadí z page_order).""" groups: list[list[int]] = [] current: list[int] = [] for pos, page_idx in enumerate(self.page_order): if pos in self.boundaries and current: groups.append(current) current = [] current.append(page_idx) if current: groups.append(current) return groups def _best_medicus(self, pages: list[int]) -> Optional[dict]: for status in ("ok", "fuzzy"): for p in pages: r = self.ocr_results.get(p) if r and r.get("medicus") and r["medicus"].get("status") == status: return r["medicus"] return None # ── Export ──────────────────────────────────────────────────────────────── def _export(self): groups = self._get_groups() SPLIT_DIR.mkdir(parents=True, exist_ok=True) names = [] for i, pages in enumerate(groups, 1): med = self._best_medicus(pages) name = _format_filename(i, med) out_path = SPLIT_DIR / name out_doc = fitz.open() for page_idx in pages: out_doc.insert_pdf(self.doc, from_page=page_idx, to_page=page_idx) rot = self.rotations.get(page_idx, 0) if rot: out_doc[-1].set_rotation((out_doc[-1].rotation - rot) % 360) out_doc.save(str(out_path)) out_doc.close() names.append(f"{name} ({len(pages)} str.)") print(f" Exportováno: {name}") messagebox.showinfo( "Export hotov", f"Exportováno {len(groups)} skupin do:\n{SPLIT_DIR}\n\n" + "\n".join(names), ) # ── Kreslení ────────────────────────────────────────────────────────────── def _redraw(self): c = self.canvas c.delete("all") n = len(self.page_order) group_of = self._group_of_pos() ocr_done = sum(1 for i in range(n) if i in self.ocr_results) # Pozadí c.create_rectangle(0, 0, self.CANVAS_W, self.CANVAS_H, fill=BG, outline="") for col in range(COLS): pos = self.scroll + col if pos >= n: break page_idx = self.page_order[pos] gi = group_of[pos] x0 = col * (self.SLOT_W + BORDER_W) # ── Pozadí slotu ───────────────────────────────────────────────── slot_color = GROUP_COLORS[gi % len(GROUP_COLORS)] c.create_rectangle( x0, 0, x0 + self.SLOT_W, PAD + self.THUMB_H + PAD, fill=slot_color, outline="" ) # ── Thumbnail ──────────────────────────────────────────────────── rot = self.rotations.get(page_idx, 0) photo = self._photo_cache.get((page_idx, rot)) if photo: c.create_image(x0 + PAD, PAD, anchor="nw", image=photo) else: c.create_text( x0 + self.SLOT_W // 2, PAD + self.THUMB_H // 2, text=f"⏳\nstr. {pos + 1}", fill=C_LOADING, font=("Consolas", 18), justify="center" ) # ── Kurzor ─────────────────────────────────────────────────────── if pos == self.cursor: c.create_rectangle( x0 + 2, 2, x0 + self.SLOT_W - 2, PAD + self.THUMB_H + PAD - 2, outline=C_CURSOR, width=5 ) # ── Info pás ───────────────────────────────────────────────────── y_info = PAD + self.THUMB_H + PAD c.create_rectangle( x0, y_info, x0 + self.SLOT_W, y_info + INFO_H, fill=BG_INFO, outline="" ) result = self.ocr_results.get(page_idx) if result is None: rc_line = "⏳ OCR probíhá…" pat_line = "" stat_color = C_LOADING else: rc = result.get("rc") rc_line = f"RČ: {rc}" if rc else "RČ: nenalezeno" med = result.get("medicus") if med: s = med["status"] p = med.get("patient") if s == "ok" and p: pat_line = f"{p['prijmeni']} {p['jmeno']}" stat_color = C_OK elif s == "fuzzy" and p: pat_line = f"~ {p['prijmeni']} {p['jmeno']}" stat_color = C_FUZZY elif s == "not_found": pat_line = "Nenalezen v Medicus" stat_color = C_NONE else: pat_line = "Medicus offline" stat_color = C_OFFLINE elif rc: pat_line = "Ověřuji…" stat_color = C_LOADING else: pat_line = "" stat_color = C_NONE c.create_text( x0 + 8, y_info + 6, text=f"str. {pos + 1}/{n} (orig: {page_idx + 1})", anchor="nw", fill=C_DIM, font=("Consolas", 10) ) c.create_text( x0 + 8, y_info + 26, text=rc_line, anchor="nw", fill=stat_color, font=("Consolas", 13, "bold") ) c.create_text( x0 + 8, y_info + 52, text=pat_line, anchor="nw", fill=stat_color, font=("Consolas", 14, "bold") ) # ── Oddělovač napravo od tohoto slotu ──────────────────────────── if col < COLS - 1: next_pos = pos + 1 is_new = next_pos in self.boundaries x_sep = x0 + self.SLOT_W c.create_rectangle( x_sep, 0, x_sep + BORDER_W, self.CANVAS_H, fill=C_BOUNDARY if is_new else C_SAME, outline="" ) if is_new: c.create_text( x_sep + BORDER_W // 2, self.CANVAS_H // 2, text="▼\nNOVÝ", fill="white", font=("Consolas", 7, "bold"), justify="center" ) # ── Stavová lišta nahoře ────────────────────────────────────────────── groups = self._get_groups() self.top_label.config( text=( f" str. {self.cursor + 1}/{n} │ " f"skupiny: {len(groups)} │ " f"OCR: {ocr_done}/{n} │ " f"{self.pdf_path.name}" ) ) # ── Vstup ───────────────────────────────────────────────────────────────────── def main(): root = tk.Tk() root.withdraw() if len(sys.argv) >= 2: pdf_path = Path(sys.argv[1]) elif TESTOVANI: pdf_path = Path(PATH_TO_TESTFILE) else: from tkinter import filedialog chosen = filedialog.askopenfilename( title="Vyber vstupní PDF", initialdir=str(_RICOH), filetypes=[("PDF soubory", "*.pdf")], ) if not chosen: root.destroy() sys.exit(0) pdf_path = Path(chosen) if not pdf_path.exists(): print(f"Soubor nenalezen: {pdf_path}") root.destroy() sys.exit(1) root.deiconify() app = SplitterUI(root, pdf_path) root.mainloop() if __name__ == "__main__": main()