z230
This commit is contained in:
@@ -29,6 +29,7 @@ Pro vývoj: `TESTOVANI = True` + `PATH_TO_TESTFILE` na začátku skriptu.
|
||||
| `/` | otočit CCW (counterclockwise) |
|
||||
| `*` | otočit CW (clockwise) |
|
||||
| `Del` / `.` | smaž stránku (vynech z exportu) |
|
||||
| `0` | znovu spustí OCR na aktuální stránce (smaže cache pro tuto stránku) |
|
||||
| `Enter` | exportuj všechny skupiny |
|
||||
| `Esc` | konec |
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ Numerická klávesnice:
|
||||
5 / Space přepni hranici pacienta před touto stránkou
|
||||
8 / Up přesuň stránku doleva (swap)
|
||||
2 / Down přesuň stránku doprava (swap)
|
||||
0 znovu spustí OCR na aktuální stránce (smaže cache pro tuto stránku)
|
||||
- výběr pacienta ručně z Medicusu
|
||||
Enter exportuj všechny skupiny do Split/
|
||||
Esc konec
|
||||
@@ -271,63 +272,76 @@ class OcrWorker:
|
||||
def stop(self):
|
||||
self._stop.set()
|
||||
|
||||
def _run(self):
|
||||
def _ocr_page(self, i: int):
|
||||
"""Spustí OCR pipeline pro stránku i a uloží výsledek do self.results."""
|
||||
import pytesseract
|
||||
pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
|
||||
|
||||
n = len(self.doc)
|
||||
for i in range(n):
|
||||
page = self.doc[i]
|
||||
mat = fitz.Matrix(2.0, 2.0) # 144 DPI — dostatečné pro OCR
|
||||
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
|
||||
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
||||
|
||||
# 1. Tesseract
|
||||
rc = None
|
||||
tess_text = None
|
||||
try:
|
||||
tess_text = pytesseract.image_to_string(img, lang="ces")
|
||||
rc = _extract_rc(tess_text)
|
||||
except Exception as e:
|
||||
print(f"[OCR str.{i+1}] Tesseract: {e}")
|
||||
|
||||
# 2. Medicus — první pokus
|
||||
medicus = _verify_medicus(rc) if rc else None
|
||||
|
||||
# 3. Claude Vision — když Tesseract nenašel RČ, nebo našel ale Medicus nezná
|
||||
claude_raw = None
|
||||
claude_usage = None
|
||||
if not rc or (medicus and medicus.get("status") == "not_found"):
|
||||
try:
|
||||
rc_claude, claude_raw, claude_usage = self._claude_rc(img)
|
||||
if rc_claude:
|
||||
medicus_claude = _verify_medicus(rc_claude)
|
||||
if medicus_claude.get("status") in ("ok", "fuzzy"):
|
||||
print(f"[OCR str.{i+1}] Claude opravil RČ: {rc} → {rc_claude}")
|
||||
rc = rc_claude
|
||||
medicus = medicus_claude
|
||||
elif not rc:
|
||||
rc = rc_claude
|
||||
medicus = medicus_claude
|
||||
except Exception as e:
|
||||
print(f"[OCR str.{i+1}] Claude: {e}")
|
||||
|
||||
result = {
|
||||
"rc": rc,
|
||||
"medicus": medicus,
|
||||
"tesseract_text": tess_text,
|
||||
"claude_raw": claude_raw,
|
||||
"claude_usage": claude_usage,
|
||||
}
|
||||
with self._lock:
|
||||
self.results[i] = result
|
||||
|
||||
def _run(self):
|
||||
for i in range(len(self.doc)):
|
||||
if self._stop.is_set():
|
||||
break
|
||||
if i in self.results:
|
||||
continue # cache hit
|
||||
|
||||
page = self.doc[i]
|
||||
mat = fitz.Matrix(2.0, 2.0) # 144 DPI — dostatečné pro OCR
|
||||
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
|
||||
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
||||
|
||||
# 1. Tesseract
|
||||
rc = None
|
||||
tess_text = None
|
||||
try:
|
||||
tess_text = pytesseract.image_to_string(img, lang="ces")
|
||||
rc = _extract_rc(tess_text)
|
||||
except Exception as e:
|
||||
print(f"[OCR str.{i+1}] Tesseract: {e}")
|
||||
|
||||
# 2. Medicus — první pokus
|
||||
medicus = _verify_medicus(rc) if rc else None
|
||||
|
||||
# 3. Claude Vision — když Tesseract nenašel RČ, nebo našel ale Medicus nezná
|
||||
claude_raw = None
|
||||
claude_usage = None
|
||||
if not rc or (medicus and medicus.get("status") == "not_found"):
|
||||
try:
|
||||
rc_claude, claude_raw, claude_usage = self._claude_rc(img)
|
||||
if rc_claude:
|
||||
medicus_claude = _verify_medicus(rc_claude)
|
||||
if medicus_claude.get("status") in ("ok", "fuzzy"):
|
||||
print(f"[OCR str.{i+1}] Claude opravil RČ: {rc} → {rc_claude}")
|
||||
rc = rc_claude
|
||||
medicus = medicus_claude
|
||||
elif not rc:
|
||||
rc = rc_claude
|
||||
medicus = medicus_claude
|
||||
except Exception as e:
|
||||
print(f"[OCR str.{i+1}] Claude: {e}")
|
||||
|
||||
result = {
|
||||
"rc": rc,
|
||||
"medicus": medicus,
|
||||
"tesseract_text": tess_text,
|
||||
"claude_raw": claude_raw,
|
||||
"claude_usage": claude_usage,
|
||||
}
|
||||
self.results[i] = result
|
||||
self._ocr_page(i)
|
||||
self._save_cache()
|
||||
self.on_page_done(i)
|
||||
|
||||
def rerun_page(self, page_idx: int, on_done):
|
||||
"""Znovu spustí OCR pro jednu stránku (ignoruje cache). Volá on_done(page_idx) po dokončení."""
|
||||
def _worker():
|
||||
with self._lock:
|
||||
self.results.pop(page_idx, None)
|
||||
self._ocr_page(page_idx)
|
||||
self._save_cache()
|
||||
on_done(page_idx)
|
||||
threading.Thread(target=_worker, daemon=True).start()
|
||||
|
||||
def _claude_rc(self, img: Image.Image) -> tuple[Optional[str], Optional[str], Optional[dict]]:
|
||||
import anthropic, base64
|
||||
|
||||
@@ -633,7 +647,7 @@ class SplitterUI:
|
||||
"1/3: přesuň stránku "
|
||||
"/: otočit ↺CCW *: otočit ↻CW "
|
||||
"Del/.: smaž stránku "
|
||||
"-: vyber pacienta ručně "
|
||||
"0: znovu OCR -: vyber pacienta ručně "
|
||||
"Enter: exportuj Esc: konec"
|
||||
)
|
||||
self.bot_label = tk.Label(
|
||||
@@ -738,6 +752,7 @@ class SplitterUI:
|
||||
103: "num7", 105: "num9",
|
||||
97: "num1", 99: "num3", 110: "numdot",
|
||||
111: "numslash", 106: "numstar", 109: "numminus",
|
||||
96: "num0",
|
||||
}
|
||||
action = numpad.get(kc) or {
|
||||
"Left": "num4", "Right": "num6",
|
||||
@@ -748,6 +763,7 @@ class SplitterUI:
|
||||
"KP_Divide": "numslash", "KP_Multiply": "numstar",
|
||||
"slash": "numslash", "asterisk": "numstar",
|
||||
"KP_Subtract": "numminus", "minus": "numminus",
|
||||
"Insert": "num0", "KP_Insert": "num0",
|
||||
}.get(ks)
|
||||
|
||||
if action == "num4":
|
||||
@@ -772,6 +788,8 @@ class SplitterUI:
|
||||
self._delete_page()
|
||||
elif action == "numminus":
|
||||
self._open_patient_picker()
|
||||
elif action == "num0":
|
||||
self._rerun_ocr_current()
|
||||
elif ks in ("Return", "KP_Enter"):
|
||||
self._export()
|
||||
elif ks == "Escape":
|
||||
@@ -843,6 +861,15 @@ class SplitterUI:
|
||||
|
||||
PatientPickerDialog(self.root, on_select)
|
||||
|
||||
def _rerun_ocr_current(self):
|
||||
page_idx = self.page_order[self.cursor]
|
||||
self.ocr_results.pop(page_idx, None)
|
||||
self._redraw()
|
||||
self.ocr_worker.rerun_page(
|
||||
page_idx,
|
||||
on_done=lambda idx: self.root.after(0, self._on_ocr_done, idx),
|
||||
)
|
||||
|
||||
def _update_boundaries_around(self, pos: int):
|
||||
"""Přidá/odstraní hranice kolem pozice pos podle potvrzených pacientů."""
|
||||
def confirmed_rc(p: int) -> Optional[str]:
|
||||
|
||||
Reference in New Issue
Block a user