This commit is contained in:
2026-04-27 11:00:40 +02:00
parent d4825553a9
commit 90bd0ecdf5
11 changed files with 1002 additions and 130 deletions
+6 -1
View File
@@ -5,7 +5,12 @@
"Bash(ls -la \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\")", "Bash(ls -la \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\")",
"Bash(find \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\" -type f)", "Bash(find \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\" -type f)",
"Bash(grep -E '\\\\.\\(py|json|txt|md|yaml|yml\\)$')", "Bash(grep -E '\\\\.\\(py|json|txt|md|yaml|yml\\)$')",
"Bash(ls -la \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\\\\\\\\Processed\" \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\\\\\\\\ToProcess\")" "Bash(ls -la \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\\\\\\\\Processed\" \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\\\\\\\\ToProcess\")",
"Bash(python -c ' *)",
"Bash(gs --version)",
"Bash(where gs *)",
"Bash(python -c \"import pypdf; print\\('pypdf ok'\\)\")",
"Bash(python -c \"import fitz; print\\('pymupdf ok', fitz.version\\)\")"
] ]
} }
} }
+37 -57
View File
@@ -7,6 +7,7 @@ import pymysql
import re import re
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
from collections import defaultdict
import time import time
import sys import sys
@@ -112,6 +113,7 @@ cur_meta.execute("""
p.displayTitle p.displayTitle
FROM medevio_downloads d FROM medevio_downloads d
JOIN pozadavky p ON d.request_id = p.id JOIN pozadavky p ON d.request_id = p.id
WHERE p.updatedAt >= DATE_SUB(NOW(), INTERVAL 14 DAY)
ORDER BY p.updatedAt DESC ORDER BY p.updatedAt DESC
""") """)
@@ -122,40 +124,28 @@ safe_print(f"📋 Found {len(rows)} attachment records.\n")
# 🧠 MAIN LOOP WITH PROGRESS # 🧠 MAIN LOOP WITH PROGRESS
# ============================== # ==============================
unique_request_ids = [] # Group rows by request_id in Python — avoids N extra SELECT filename queries
seen = set() rows_by_request = defaultdict(list)
for r in rows: for r in rows:
req_id = r["request_id"] rows_by_request[r["request_id"]].append(r)
if req_id not in seen:
unique_request_ids.append(req_id)
seen.add(req_id)
total_requests = len(unique_request_ids) total_requests = len(rows_by_request)
safe_print(f"🔄 Processing {total_requests} unique requests...\n") safe_print(f"🔄 Processing {total_requests} unique requests...\n")
processed_requests = set() # Pre-index BASE_DIR once — avoids iterdir() called twice per request
current_index = 0 folder_list = [(f, f.name) for f in BASE_DIR.iterdir() if f.is_dir()]
for r in rows: for current_index, (req_id, req_rows) in enumerate(rows_by_request.items(), 1):
req_id = r["request_id"]
if req_id in processed_requests:
continue
processed_requests.add(req_id)
current_index += 1
percent = (current_index / total_requests) * 100 percent = (current_index / total_requests) * 100
safe_print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests}{req_id}") safe_print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests}{req_id}")
# ========== FETCH VALID FILENAMES ========== # ========== VALID FILENAMES from already-loaded rows ==========
cur_meta.execute( # original filename → sanitized name (needed for DB query later)
"SELECT filename FROM medevio_downloads WHERE request_id=%s", file_map = {sanitize_name(r["filename"]): r["filename"] for r in req_rows}
(req_id,) valid_files = set(file_map.keys())
)
valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()}
# ========== BUILD FOLDER NAME ========== # ========== BUILD FOLDER NAME ==========
r = req_rows[0]
updated_at = r["req_updated_at"] or datetime.now() updated_at = r["req_updated_at"] or datetime.now()
date_str = updated_at.strftime("%Y-%m-%d") date_str = updated_at.strftime("%Y-%m-%d")
@@ -168,21 +158,15 @@ for r in rows:
f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}" f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}"
) )
# ========== DETECT EXISTING FOLDER ========== # ========== DETECT EXISTING FOLDER from pre-built index ==========
existing_folder = None req_id_str = str(req_id)
matching = [f for f, name in folder_list if req_id_str in name]
for f in BASE_DIR.iterdir(): existing_folder = matching[0] if matching else None
if f.is_dir() and req_id in f.name:
existing_folder = f
break
main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name
# ========== MERGE DUPLICATES ========== # ========== MERGE DUPLICATES ==========
possible_dups = [ possible_dups = [f for f, name in folder_list if req_id_str in name and f != main_folder]
f for f in BASE_DIR.iterdir()
if f.is_dir() and req_id in f.name and f != main_folder
]
for dup in possible_dups: for dup in possible_dups:
safe_print(f"♻️ Merging duplicate folder: {dup.name}") safe_print(f"♻️ Merging duplicate folder: {dup.name}")
@@ -201,36 +185,32 @@ for r in rows:
# ========== CLEAN MAIN FOLDER ========== # ========== CLEAN MAIN FOLDER ==========
clean_folder(main_folder, valid_files) clean_folder(main_folder, valid_files)
# ========== DOWNLOAD MISSING FILES ========== # ========== DOWNLOAD MISSING FILES (batch blob fetch per request) ==========
added_new_file = False
main_folder.mkdir(parents=True, exist_ok=True) main_folder.mkdir(parents=True, exist_ok=True)
added_new_file = False
for filename in valid_files: missing_san = [
dest_plain = main_folder / filename fn for fn in valid_files
dest_marked = main_folder / ("" + filename) if not (main_folder / fn).exists() and not (main_folder / ("" + fn)).exists()
]
if dest_plain.exists() or dest_marked.exists():
continue
added_new_file = True
if missing_san:
# Fetch all missing blobs in a single query instead of one per file
missing_orig = [file_map[fn] for fn in missing_san]
placeholders = ",".join(["%s"] * len(missing_orig))
cur_blob.execute( cur_blob.execute(
"SELECT file_content FROM medevio_downloads " f"SELECT filename, file_content FROM medevio_downloads "
"WHERE request_id=%s AND filename=%s", f"WHERE request_id=%s AND filename IN ({placeholders})",
(req_id, filename) [req_id] + missing_orig,
) )
row = cur_blob.fetchone() for blob_filename, content in cur_blob.fetchall():
if not row:
continue
content = row[0]
if not content: if not content:
continue continue
dest_plain = main_folder / sanitize_name(blob_filename)
with open(dest_plain, "wb") as f: with open(dest_plain, "wb") as fh:
f.write(content) fh.write(content)
safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}") safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}")
added_new_file = True
# ========== REMOVE ▲ FLAG IF NEW FILES ADDED ========== # ========== REMOVE ▲ FLAG IF NEW FILES ADDED ==========
if added_new_file and "" in main_folder.name: if added_new_file and "" in main_folder.name:
@@ -0,0 +1,111 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Compress PDF — output DPI and JPEG quality are chosen automatically
based on the detected resolution of the source PDF.
Usage: python compress_pdf.py <input.pdf> [output.pdf]
python compress_pdf.py (processes all PDFs in current folder)
Output filename: original_name (139 kB).pdf
"""
import sys
import fitz
from pathlib import Path
# ==============================
# COMPRESSION TABLE
# Detected source DPI -> (output DPI, JPEG quality)
# Rows are evaluated top-to-bottom; first match wins.
# ==============================
#
# src_dpi_min src_dpi_max out_dpi jpeg_quality
COMPRESSION_TABLE = [
( 0, 99, 72, 60), # very low res — already small, compress hard
( 100, 149, 100, 70), # low res
( 150, 249, 150, 80), # standard scan (our tested sweet spot)
( 250, 399, 150, 80), # good scan — downsample to 150 is fine
( 400, 599, 200, 85), # high res scan
( 600, 9999, 150, 80), # very high res / professional scan
]
def detect_source_dpi(src: fitz.Document) -> int:
"""Estimate source DPI from the largest image on the first page."""
page = src[0]
images = page.get_images(full=True)
if not images:
return 150 # no raster images — use default
# Find the largest image by pixel area
best = max(images, key=lambda img: img[2] * img[3]) # width * height
img_w_px, img_h_px = best[2], best[3]
# Page size in inches (1 point = 1/72 inch)
page_w_in = page.rect.width / 72.0
page_h_in = page.rect.height / 72.0
dpi_x = img_w_px / page_w_in if page_w_in else 0
dpi_y = img_h_px / page_h_in if page_h_in else 0
return round((dpi_x + dpi_y) / 2)
def pick_settings(source_dpi: int) -> tuple[int, int]:
for min_dpi, max_dpi, out_dpi, quality in COMPRESSION_TABLE:
if min_dpi <= source_dpi <= max_dpi:
return out_dpi, quality
# fallback to last row
return COMPRESSION_TABLE[-1][2], COMPRESSION_TABLE[-1][3]
def compress(input_path: Path, output_path: Path = None):
src = fitz.open(input_path)
source_dpi = detect_source_dpi(src)
out_dpi, jpeg_quality = pick_settings(source_dpi)
print(f" zdroj ~{source_dpi} DPI -> komprese {out_dpi} DPI / JPEG q{jpeg_quality}")
zoom = out_dpi / 72.0
mat = fitz.Matrix(zoom, zoom)
out_doc = fitz.open()
for page in src:
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
img_bytes = pix.tobytes("jpeg", jpg_quality=jpeg_quality)
img_doc = fitz.open("pdf", fitz.open("jpeg", img_bytes).convert_to_pdf())
rect = page.rect
new_page = out_doc.new_page(width=rect.width, height=rect.height)
new_page.show_pdf_page(new_page.rect, img_doc, 0)
src.close()
tmp = input_path.with_suffix(".tmp.pdf")
out_doc.save(tmp, deflate=True, garbage=4)
out_doc.close()
size_kb = round(tmp.stat().st_size / 1024)
if output_path is None:
output_path = input_path.parent / f"{input_path.stem} ({size_kb} kB).pdf"
if output_path.exists():
output_path.unlink()
tmp.rename(output_path)
orig_kb = round(input_path.stat().st_size / 1024)
saving = (1 - size_kb / orig_kb) * 100
print(f" {input_path.name} -> {output_path.name} (bylo {orig_kb} kB, uspora {saving:.0f}%)")
if __name__ == "__main__":
if len(sys.argv) >= 2:
inp = Path(sys.argv[1])
out = Path(sys.argv[2]) if len(sys.argv) >= 3 else None
compress(inp, out)
else:
folder = Path(__file__).parent
pdfs = [p for p in folder.glob("*.pdf") if not p.name.endswith(").pdf") and p.stem != Path(__file__).stem]
if not pdfs:
print("Zadne PDF k zpracovani.")
for pdf in pdfs:
compress(pdf)
@@ -0,0 +1,60 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Compress a PDF into multiple variants at different DPI / JPEG quality settings.
Uses PyMuPDF (fitz) — renders each page as JPEG image, saves back as PDF.
"""
import sys
import fitz # PyMuPDF
from pathlib import Path
INPUT = Path(r"u:\Medevio\50 Různé testy\MinimizeOptimizePDF\afd1823b-8277-44a2-84e1-db89a0ccd134.pdf")
OUT_DIR = INPUT.parent
VARIANTS = [
# (label, dpi, jpeg_quality)
("300dpi_q90", 300, 90),
("200dpi_q85", 200, 85),
("150dpi_q80", 150, 80),
("120dpi_q75", 120, 75),
("96dpi_q70", 96, 70),
("72dpi_q60", 72, 60),
]
src = fitz.open(INPUT)
original_size = INPUT.stat().st_size
print(f"Originál: {INPUT.name} ({original_size / 1024:.0f} KB)\n")
print(f"{'Varianta':<20} {'DPI':>5} {'Kvalita':>8} {'Velikost':>12} {'Úspora':>8}")
print("-" * 58)
for label, dpi, quality in VARIANTS:
out_path = OUT_DIR / f"{INPUT.stem}_{label}.pdf"
zoom = dpi / 72.0
mat = fitz.Matrix(zoom, zoom)
out_doc = fitz.open()
for page in src:
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
img_bytes = pix.tobytes("jpeg", jpg_quality=quality)
# Create a new PDF page with the same physical dimensions
img_doc = fitz.open("pdf", fitz.open("jpeg", img_bytes).convert_to_pdf())
# Scale page back to original size
rect = page.rect
new_page = out_doc.new_page(width=rect.width, height=rect.height)
new_page.show_pdf_page(new_page.rect, img_doc, 0)
out_doc.save(out_path, deflate=True, garbage=4)
out_doc.close()
size = out_path.stat().st_size
size_kb = round(size / 1024)
final_path = OUT_DIR / f"{INPUT.stem}_{label} ({size_kb} kB).pdf"
out_path.rename(final_path)
saving = (1 - size / original_size) * 100
print(f"{label:<20} {dpi:>5} {quality:>8} {size_kb:>9} kB {saving:>7.0f}%")
src.close()
print("\nHotovo.")
+64
View File
@@ -374,5 +374,69 @@
{ {
"original": "8452 2026-04-02 Věkrbeová [Laboratoř] [moč chemicky: ERY trace, ostatní neg., pH 6, SG 1.020].pdf", "original": "8452 2026-04-02 Věkrbeová [Laboratoř] [moč chemicky: ERY trace, ostatní neg., pH 6, SG 1.020].pdf",
"corrected": "8755120429 2026-04-02 [uritex] [moč chemicky ERY trace, ostatní neg., pH 6, SG 1.020].pdf" "corrected": "8755120429 2026-04-02 [uritex] [moč chemicky ERY trace, ostatní neg., pH 6, SG 1.020].pdf"
},
{
"original": "461001479 2026-04-21 Šťastný, Libor [LZ endokrinologie] [St.p. TTE dx a STE sin 5/18, strumiprivní hypotyreóza substituovaná, Letrox 150ug].pdf",
"corrected": "461001479 2026-04-21 Šťastný, Libor [LZ endokrinologie] [St.p. TTE dx a STE sin 518, strumiprivní hypotyreóza substituovaná, Letrox 150ug].pdf"
},
{
"original": "9901040000 2026-04-26 Tvrz, Matěj [export zdraví krevní tlak] [prům. 153/74 mmHg, hypertenze 5d, emergentní hypertenzní stav 1d].pdf",
"corrected": "9901040000 2026-04-26 Tvrz, Matěj [export zdraví krevní tlak] [prům. 15374 mmHg, hypertenze 5d, emergentní hypertenzní stav 1d].pdf"
},
{
"original": "395907022 2026-04-10 Herzová, Marie [LZ ortopedie] [Gonarthrosis bilat., obstr. kortik. +M i.a. vlevo, indik. lázně VII7, M179].pdf",
"corrected": "395907022 2026-04-10 Herzová, Marie [LZ ortopedie] [indikace lázně VII7, M179, gonarthrosis bilat., obstr. kortik. +M i.a. vlevo].pdf"
},
{
"original": "6008091738 2020-07-15 Nikitin, Petro [LZ gastroenterologie] [Antrumgastritida, inkompetentní kardie, gastroesophageální reflux].pdf",
"corrected": "6008091738 2020-07-15 Nikitin, Petro [LZ gastro] [gastroskopie, antrumgastritida, inkompetentní kardie, gastroesophageální reflux].pdf"
},
{
"original": "6008091738 2025-11-24 Nikitin, Petro [LZ dermatologie] [seboroická verruka].pdf",
"corrected": "6008091738 2025-11-24 Nikitin, Petro [LZ kožní [seboroická verruka L tváøe, abraze].pdf"
},
{
"original": "6008091738 2025-08-25 Nikitin, Petro [LZ kožní] [pigmentové névy tč. klidné, bez onkosuspekce].pdf",
"corrected": "6008091738 2025-08-25 Nikitin, Petro [LZ kožní] [vyšetøení dermatoskopem, pigmentové névy tč. klidné, bez onkosuspekce].pdf"
},
{
"original": "6008091738 2025-05-20 Nikitin, Petro [LZ kardiologie] [ICHS, po PCI RIA 2018, EF LK 65%, mírná dilatace aort. kořene bez progrese].pdf",
"corrected": "6008091738 2025-05-20 Nikitin, Petro [LZ kardiologie] [kontrola, ICHS, po PCI RIA 2018, EF LK 65%, mírná dilatace aort. kořene bez progrese].pdf"
},
{
"original": "6008091738 2018-08-24 Nikitin, Petro [RTG páteře] [C páteř: lordosa oploštělá, C56 zúžen, spondylóza; Th: skolióza, kyfóza, Th7-10].pdf",
"corrected": "6008091738 2018-08-24 Nikitin, Petro [RTG páteře] [C páteř lordosa oploštělá, C56 zúžen, spondylóza; Th skolióza, kyfóza, Th7-10].pdf"
},
{
"original": "7109203893 2026-04-07 Deyak, Mykhaylo [Laboratoř] [glukóza 7,1, HbA1c 36, chol. 4,49, LDL 3,07, HDL 0,99, osmolalita 301, PSA 1,438].pdf",
"corrected": "7109203893 2026-04-07 Deyak, Mykhaylo [Laboratoř] [Z000, glukóza 7,1, HbA1c 36, chol. 4,49, LDL 3,07, HDL 0,99, osmolalita 301, PSA 1,438].pdf"
},
{
"original": "415414073 2026-04-21 Pekárková, Vlasta [Laboratoř] [Z000, K 5,8, osmolalita 296, glukóza 5,7, HbA1c 41, CKD-EPI 0,92 G3a, trombocyty 140].pdf",
"corrected": "415414073 2026-04-21 Pekárková, Vlasta [Laboratoř] [Z000, prediabetes, K 5,8, osmolalita 296, glukóza 5,7, HbA1c 41, CKD-EPI 0,92 G3a, trombocyty 140].pdf"
},
{
"original": "505218025 2026-04-22 Beznosková, Milena [Laboratoř] [E789, urea 8,31, CKD-EPI 1,33 G2, osmolalita 302, glukóza 7,5, CK 5,49].pdf",
"corrected": "505218025 2026-04-22 Beznosková, Milena [Laboratoř] [E789, diabetes, urea 8,31, CKD-EPI 1,33 G2, osmolalita 302, glukóza 7,5, CK 5,49].pdf"
},
{
"original": "500206172 2026-04-22 Beznoska, Miloslav [Laboratoř] [E789, CKD-EPI 1,21 G2, glukóza 5,9, HbA1c 41, LDL 3,29].pdf",
"corrected": "500206172 2026-04-22 Beznoska, Miloslav [Laboratoř] [E789, prediabetes, CKD-EPI 1,21 G2, glukóza 5,9, HbA1c 41, LDL 3,29].pdf"
},
{
"original": "475915054 2026-04-20 Žabová, Věra [Laboratoř] [moč: E. coli 10E5 CFU/ml, citlivá na ampicilin, cefuroxim, cotrimoxazol, pivmecilinam].pdf",
"corrected": "475915054 2026-04-20 Žabová, Věra [Laboratoř] [N309, kultivace a citlivost, moč E. coli 10E5 CFUml, citlivá na ampicilin, cefuroxim, cotrimoxazol, pivmecilinam].pdf"
},
{
"original": "7059087629 2026-04-13 Tůmová, Renáta [Laboratoř] [E789, chol. 7,34, LDL 4,52, non-HDL 5,53, glukóza 5,83, CKD-EPI 1,42 G2].pdf",
"corrected": "7059087629 2026-04-13 Tůmová, Renáta [Laboratoř] [E789, smíšená hyperlipidémie, prediabetes, chol. 7,34, LDL 4,52, non-HDL 5,53, glukóza 5,83, CKD-EPI 1,42 G2].pdf"
},
{
"original": "7352200328 2026-04-10 Vališová, Gabriela [Laboratoř] [Z000, chol. 5,62, LDL 3,19, HDL 1,13, TG 4,29, non-HDL 4,5, glukóza 5,4].pdf",
"corrected": "7352200328 2026-04-10 Vališová, Gabriela [Laboratoř] [Z000, smíšená hyperlipidémie, chol. 5,62, LDL 3,19, HDL 1,13, TG 4,29, non-HDL 4,5, glukóza 5,4].pdf"
},
{
"original": "6757100592 2026-04-16 Slabá, Radka [Laboratoř] [E789, CKD-EPI 1,31 G2, TG 1,90, glukóza 5,8, HbA1c 36, chol. 4,35, LDL 2,10].pdf",
"corrected": "6757100592 2026-04-16 Slabá, Radka [Laboratoř] [E789, prediabetes, CKD-EPI 1,31 G2, TG 1,90, glukóza 5,8, HbA1c 36, chol. 4,35, LDL 2,10].pdf"
} }
] ]
+17 -68
View File
@@ -382,77 +382,27 @@ def _open_preview(root, pdf_path: Path):
def _rename_dialog(nazev: str, info_lines: list[str]) -> str | None: def _rename_dialog(nazev: str, info_lines: list[str]) -> str | None:
""" """
Tkinter dialog pro schválení / opravu názvu souboru. Spustí rename_dialog.py jako subprocess — vyhneme se Tkinter konfliktům s PyCharm.
Vrátí finální název (s .pdf) nebo None = přeskočit. Vrátí finální název (s .pdf) nebo None = přeskočit.
""" """
import tkinter as tk import tempfile
result = {"value": None} data = {"nazev": nazev, "info_lines": info_lines}
tmp = Path(tempfile.mktemp(suffix=".json"))
tmp.write_text(json.dumps(data, ensure_ascii=False), encoding="utf-8")
root = tk.Tk() dialog_script = Path(__file__).parent / "rename_dialog.py"
root.withdraw() try:
root.tk.call("encoding", "system", "utf-8") proc = subprocess.run(
[sys.executable, str(dialog_script), str(tmp)],
dlg = tk.Toplevel(root) capture_output=True, text=True, encoding="utf-8",
dlg.title("Schválení názvu souboru") )
dlg.resizable(True, False) output = proc.stdout.strip()
dlg.attributes("-topmost", True) if output:
return json.loads(output).get("value")
pad = {"padx": 12, "pady": 6} return None
finally:
# Informační sekce tmp.unlink(missing_ok=True)
frame_info = tk.Frame(dlg, bg="#f0f0f0", bd=1, relief="sunken")
frame_info.pack(fill="x", **pad)
for line in info_lines:
color = "#b00000" if line.startswith("") else "#004080" if line.startswith("") else "#333"
tk.Label(frame_info, text=line, anchor="w", bg="#f0f0f0",
fg=color, font=("Segoe UI", 10)).pack(fill="x", padx=8, pady=1)
# Pole pro název (bez .pdf)
tk.Label(dlg, text="Název souboru (bez .pdf):", anchor="w",
font=("Segoe UI", 9, "bold")).pack(fill="x", padx=12, pady=(10, 2))
nazev_bez = nazev[:-4] if nazev and nazev.endswith(".pdf") else (nazev or "")
var = tk.StringVar(value=nazev_bez)
entry = tk.Entry(dlg, textvariable=var, font=("Segoe UI", 10), width=90)
entry.pack(fill="x", padx=12, pady=(0, 10))
entry.icursor(tk.END)
entry.focus_set()
# Tlačítka
frame_btn = tk.Frame(dlg)
frame_btn.pack(pady=(0, 12))
def schvalit(event=None):
result["value"] = var.get().strip()
root.destroy()
def preskocit(event=None):
result["value"] = None
root.destroy()
tk.Button(frame_btn, text="✓ Schválit (Enter)", command=schvalit,
bg="#2a7a2a", fg="white", font=("Segoe UI", 10, "bold"),
padx=16, pady=6).pack(side="left", padx=8)
tk.Button(frame_btn, text="✗ Přeskočit (Esc)", command=preskocit,
bg="#7a2a2a", fg="white", font=("Segoe UI", 10),
padx=16, pady=6).pack(side="left", padx=8)
dlg.bind("<Return>", schvalit)
dlg.bind("<Escape>", preskocit)
# Umísti dialog vpravo od náhledu (nebo vystředit pokud náhled není)
dlg.update_idletasks()
sw = dlg.winfo_screenwidth()
sh = dlg.winfo_screenheight()
w = dlg.winfo_width()
h = dlg.winfo_height()
x = min(720, sw - w - 20)
y = (sh - h) // 2
dlg.geometry(f"+{x}+{y}")
root.mainloop()
return result["value"]
def print_verification(verif: dict, rc_from_scan: str): def print_verification(verif: dict, rc_from_scan: str):
@@ -564,7 +514,6 @@ def _start_preview_process(pdf_path: Path):
viewer = Path(__file__).parent / "preview_viewer.py" viewer = Path(__file__).parent / "preview_viewer.py"
proc = subprocess.Popen( proc = subprocess.Popen(
[sys.executable, str(viewer), str(tmp), "--delete-on-close"], [sys.executable, str(viewer), str(tmp), "--delete-on-close"],
creationflags=subprocess.CREATE_NO_WINDOW if hasattr(subprocess, "CREATE_NO_WINDOW") else 0,
) )
def close(): def close():
@@ -0,0 +1,449 @@
"""
Zpracování naskenovaných PDF — nová verze.
1. Preview originálu + Claude Vision API
2. Rename dialog
3. 5 variant komprese → uživatel vybere
4. Uložit do Processed, smazat originál
"""
import base64
import gc
import io
import json
import os
import re
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
if sys.platform == "win32":
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
import anthropic
from pdf2image import convert_from_path
sys.path.insert(0, str(Path(__file__).parent.parent))
from Knihovny.najdi_dropbox import get_dropbox_root
from Knihovny.najdi_medicus import get_medicus_config
def _load_env():
env_path = Path(__file__).parent.parent / ".env"
if env_path.exists():
for line in env_path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if "=" in line and not line.startswith("#"):
k, v = line.split("=", 1)
os.environ[k.strip()] = v.strip()
_load_env()
POPPLER_PATH = r"C:/Poppler/Library/bin"
_DROPBOX = Path(get_dropbox_root())
TO_PROCESS = _DROPBOX / r"Ordinace\Dokumentace_ke_zpracování\Ricoh Fi-8040\KeZpracování"
PROCESSED = _DROPBOX / r"Ordinace\Dokumentace_ke_zpracování\Ricoh Fi-8040\Zpracováno"
CORRECTIONS_FILE = Path(__file__).parent / "corrections.json"
DOKUMENTACE = _DROPBOX / r"Ordinace\Dokumentace_zpracovaná"
import threading
_dokumentace_index: set[str] = set()
_dokumentace_ready = threading.Event()
def _load_dokumentace_index_bg():
if DOKUMENTACE.exists():
names = {f.name for f in DOKUMENTACE.iterdir() if f.is_file()}
else:
names = set()
global _dokumentace_index
_dokumentace_index = names
_dokumentace_ready.set()
print(f" Index dokumentace: {len(names)} souborů načteno.")
def start_dokumentace_index():
t = threading.Thread(target=_load_dokumentace_index_bg, daemon=True)
t.start()
VIEWER = Path(__file__).parent / "preview_viewer.py"
RENAME_DIALOG = Path(__file__).parent / "rename_dialog.py"
VARIANT_PICKER = Path(__file__).parent / "variant_picker.py"
# 5 kompresních variant
COMPRESS_VARIANTS = [
("300 DPI / q90", 300, 90),
("200 DPI / q85", 200, 85),
("150 DPI / q80", 150, 80),
("120 DPI / q75", 120, 75),
( "96 DPI / q70", 96, 70),
]
# ─── Komprese jedné varianty ──────────────────────────────────────────────────
def compress_to_temp(pdf_path: Path, dpi: int, quality: int) -> Path:
import fitz
src = fitz.open(str(pdf_path))
mat = fitz.Matrix(dpi / 72.0, dpi / 72.0)
out = fitz.open()
for page in src:
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
img_bytes = pix.tobytes("jpeg", jpg_quality=quality)
img_doc = fitz.open("pdf", fitz.open("jpeg", img_bytes).convert_to_pdf())
rect = page.rect
np = out.new_page(width=rect.width, height=rect.height)
np.show_pdf_page(np.rect, img_doc, 0)
src.close()
tmp = Path(tempfile.mktemp(suffix=".pdf"))
out.save(tmp, deflate=True, garbage=4)
out.close()
return tmp
# ─── Medicus ověření ─────────────────────────────────────────────────────────
def _medicus_connect():
try:
import fdb
cfg = get_medicus_config()
return fdb.connect(dsn=cfg.dsn, user="SYSDBA", password="masterkey", charset="win1250")
except Exception as e:
print(f" [Medicus] Nepřipojeno: {e}")
return None
def _lookup_by_rc(cur, rc_digits: str) -> dict | None:
cur.execute(
"SELECT IDPAC, PRIJMENI, JMENO, RODCIS FROM KAR "
"WHERE REPLACE(RODCIS, '/', '') = ?", (rc_digits,)
)
row = cur.fetchone()
if row:
return {"idpac": row[0], "prijmeni": row[1].strip(), "jmeno": row[2].strip(), "rodcis": row[3].strip()}
return None
def _rc_candidates(rc: str) -> list[str]:
similar = {"0": "8", "8": "0", "1": "7", "7": "1", "5": "6", "6": "5", "3": "8"}
candidates = set()
for i in range(len(rc)):
candidates.add(rc[:i] + rc[i+1:])
for i in range(len(rc) + 1):
candidates.add(rc[:i] + "0" + rc[i:])
for i, ch in enumerate(rc):
if ch in similar:
candidates.add(rc[:i] + similar[ch] + rc[i+1:])
candidates.discard(rc)
return sorted(c for c in candidates if len(c) in (9, 10))
def _rc_checksum_ok(rc: str) -> bool:
digits = re.sub(r"\D", "", rc)
if len(digits) == 10:
return int(digits) % 11 == 0
return True
def verify_patient(rc_raw: str) -> dict:
rc = re.sub(r"\D", "", rc_raw or "")
if not rc:
return {"status": "not_found", "patient": None, "rc_corrected": None}
con = _medicus_connect()
if con is None:
return {"status": "offline", "patient": None, "rc_corrected": None}
try:
cur = con.cursor()
patient = _lookup_by_rc(cur, rc)
if patient:
return {"status": "ok", "patient": patient, "rc_corrected": None}
candidates = _rc_candidates(rc)
matches = [(c, _lookup_by_rc(cur, c)) for c in candidates]
matches = [(c, p) for c, p in matches if p]
if not matches:
return {"status": "not_found", "patient": None, "rc_corrected": None}
matches.sort(key=lambda x: (0 if _rc_checksum_ok(x[0]) else 1))
best_rc, best_patient = matches[0]
return {"status": "fuzzy", "patient": best_patient, "rc_corrected": best_rc, "all_matches": matches}
finally:
con.close()
def check_duplicates(rc: str, datum: str) -> list[str]:
if not rc or not datum:
return []
# Počkej max 15s na dokončení indexu (typicky hotovo za dobu volání Claude)
_dokumentace_ready.wait(timeout=15)
prefix = f"{rc} {datum}"
return [name for name in _dokumentace_index if name.startswith(prefix)]
# ─── Korekce (few-shot příklady) ─────────────────────────────────────────────
def load_corrections() -> list[dict]:
if CORRECTIONS_FILE.exists():
return json.loads(CORRECTIONS_FILE.read_text(encoding="utf-8"))
return []
def save_correction(original: str, corrected: str):
corrections = load_corrections()
for c in corrections:
if c["original"] == original and c["corrected"] == corrected:
return
corrections.append({"original": original, "corrected": corrected})
CORRECTIONS_FILE.write_text(
json.dumps(corrections, ensure_ascii=False, indent=2), encoding="utf-8"
)
print(f" ✓ Korekce uložena ({len(corrections)} celkem)")
def build_corrections_prompt() -> str:
corrections = load_corrections()
if not corrections:
return ""
lines = ["Příklady korekcí z minulých běhů (uč se z nich):"]
for c in corrections[-10:]:
lines.append(f' - špatně: "{c["original"]}"')
lines.append(f' správně: "{c["corrected"]}"')
return "\n".join(lines) + "\n\n"
# ─── Claude Vision API ────────────────────────────────────────────────────────
def extract_info(pdf_path: Path) -> dict:
print(" Převádím na obrázek...")
suffix = pdf_path.suffix.lower()
if suffix in (".jpg", ".jpeg", ".png"):
from PIL import Image
img = Image.open(pdf_path)
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=95)
img.close()
else:
images = convert_from_path(str(pdf_path), poppler_path=POPPLER_PATH, dpi=300)
buf = io.BytesIO()
images[0].save(buf, format="JPEG", quality=95)
del images
gc.collect()
image_b64 = base64.standard_b64encode(buf.getvalue()).decode("utf-8")
prompt = (
build_corrections_prompt() +
"Toto je naskenovaná lékařská zpráva v češtině. "
"Vrať JSON s těmito poli:\n"
"- \"jmeno\": celé jméno pacienta (příjmení + jméno + případný titul)\n"
"- \"rodne_cislo\": rodné číslo pacienta BEZ lomítka (pouze číslice)\n"
"- \"datum_zpravy\": datum zprávy ve formátu YYYY-MM-DD\n"
"- \"typ_dokumentu\": typ dokumentu — "
"\"LZ {oddělení}\" = ambulantní/lékařská zpráva (např. \"LZ chirurgie\", \"LZ kardiologie\", \"LZ plicní\", \"LZ ORL\"); "
"\"PZ {oddělení}\" = propouštěcí zpráva z hospitalizace (např. \"PZ interna\", \"PZ neurologie\"). "
"Jiné typy: \"Laboratoř\", \"CT břicha\", \"MRI páteře\", \"kolonoskopie\", "
"\"operační protokol oční\", \"poukaz FT\", \"diagnostická mamografie\" atd.\n"
"- \"poznamka\": krátká klinická poznámka česky, max 80 znaků. "
"DŮLEŽITÉ: pokud zpráva obsahuje sekci \"Závěr:\" nebo \"Závěr vyšetření:\", "
"použij VÝHRADNĚ obsah této sekce — je nejdůležitější. "
"Teprve pokud závěr chybí, shrň obsah z celé zprávy. "
"U laboratorních výsledků uváděj POUZE hodnoty mimo normu (patologické nálezy) — hodnoty v normě vynech. "
"Osmolalitu nikdy nezmiňuj ani jako patologický nález. "
"Pokud výsledky obsahují glomerulární filtraci (eGFR nebo C_CKD-EPI), přidej její klasifikaci velkými písmeny podle CKD-EPI: "
"eGFR ≥ 90 → CHRI G1, 6089 → CHRI G2, 4559 → CHRI G3a, 3044 → CHRI G3b, 1529 → CHRI G4, < 15 → CHRI G5.\n"
"- \"nazev_souboru\": název souboru ve formátu "
"\"{rodne_cislo} {datum_zpravy} {Příjmení}, {Jméno} [{typ_dokumentu}] [{poznamka}].pdf\" "
"(jméno bez titulu, RČ bez lomítka)\n"
"- \"rotace\": o kolik stupňů CCW je třeba otočit obrázek aby byl text čitelně na výšku nebo šířku "
"(hodnoty: 0, 90, 180, 270). Pokud je text již správně orientovaný, vrať 0.\n\n"
"Pokud pole nenajdeš, použij null. Nepiš nic jiného než JSON."
)
print(" Volám Claude Vision API...")
client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=400,
messages=[{"role": "user", "content": [
{"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_b64}},
{"type": "text", "text": prompt},
]}],
)
usage = response.usage
print(f" Tokeny: {usage.input_tokens} in + {usage.output_tokens} out = ${usage.input_tokens*3/1e6 + usage.output_tokens*15/1e6:.4f}")
raw = response.content[0].text.strip()
if raw.startswith("```"):
raw = raw.split("```")[1]
if raw.startswith("json"):
raw = raw[4:]
try:
return json.loads(raw.strip())
except json.JSONDecodeError:
print(f" VAROVÁNÍ: nelze parsovat JSON: {raw!r}")
return {"nazev_souboru": None, "raw": raw}
# ─── Subprocess helpers ───────────────────────────────────────────────────────
def open_preview(pdf_path: Path) -> tuple[subprocess.Popen, Path]:
geom_file = Path(tempfile.mktemp(suffix=".json"))
proc = subprocess.Popen([sys.executable, str(VIEWER), str(pdf_path), f"--write-geometry={geom_file}"])
return proc, geom_file
def read_preview_bottom(geom_file: Path, timeout: float = 5.0) -> int:
import time
deadline = time.time() + timeout
while time.time() < deadline:
if geom_file.exists():
geom = json.loads(geom_file.read_text(encoding="utf-8"))
geom_file.unlink(missing_ok=True)
return geom["y"] + geom["h"] + 30 # +30 pro title bar
time.sleep(0.1)
geom_file.unlink(missing_ok=True)
return None
def run_rename_dialog(nazev: str, info_lines: list, below_y: int = None) -> str | None:
tmp = Path(tempfile.mktemp(suffix=".json"))
tmp.write_text(json.dumps({"nazev": nazev, "info_lines": info_lines}, ensure_ascii=False), encoding="utf-8")
args = [sys.executable, str(RENAME_DIALOG), str(tmp)]
if below_y is not None:
args.append(f"--below-y={below_y}")
proc = subprocess.run(args, capture_output=True, text=True, encoding="utf-8")
tmp.unlink(missing_ok=True)
out = proc.stdout.strip()
return json.loads(out).get("value") if out else None
def run_variant_picker(variants_data: list) -> str | None:
tmp = Path(tempfile.mktemp(suffix=".json"))
tmp.write_text(json.dumps(variants_data, ensure_ascii=False), encoding="utf-8")
proc = subprocess.run(
[sys.executable, str(VARIANT_PICKER), str(tmp)],
capture_output=True, text=True, encoding="utf-8",
)
tmp.unlink(missing_ok=True)
out = proc.stdout.strip()
return json.loads(out).get("chosen") if out else None
# ─── Hlavní flow ──────────────────────────────────────────────────────────────
def process_file(pdf_path: Path):
print(f"\nSoubor: {pdf_path.name}")
# Spusť načítání indexu dokumentace na pozadí — hotovo za dobu volání Claude
start_dokumentace_index()
# 1. Otevři preview originálu
preview, geom_file = open_preview(pdf_path)
below_y = read_preview_bottom(geom_file)
# 2. Claude Vision API
info = extract_info(pdf_path)
nazev = info.get("nazev_souboru") or pdf_path.name
# 3. Medicus ověření + fuzzy matching RČ
rc_from_scan = re.sub(r"\D", "", info.get("rodne_cislo") or "")
print(f" Ověřuji v Medicus (RČ: {rc_from_scan})...")
verif = verify_patient(rc_from_scan)
# Oprava RČ při fuzzy matchi
if verif["status"] == "fuzzy" and verif.get("rc_corrected") and nazev:
nazev = nazev.replace(rc_from_scan, verif["rc_corrected"], 1)
print(f" → RČ opraveno: {rc_from_scan}{verif['rc_corrected']}")
# Info řádky pro dialog
status = verif["status"]
patient = verif.get("patient")
info_lines = []
if status == "ok":
info_lines.append(f"✓ Medicus: {patient['prijmeni']} {patient['jmeno']} | RČ {patient['rodcis']}")
elif status == "fuzzy":
info_lines.append(f"⚠ RČ ze skenu '{rc_from_scan}' → opraveno na {verif['rc_corrected']}")
info_lines.append(f" Pacient: {patient['prijmeni']} {patient['jmeno']} | RČ {patient['rodcis']}")
elif status == "not_found":
info_lines.append(f"✗ RČ '{rc_from_scan}' nenalezeno v Medicus")
else:
info_lines.append("— Medicus nedostupný (offline)")
# Duplicity
rc_final = re.sub(r"\D", "", verif["patient"]["rodcis"] if patient else rc_from_scan)
duplicity = check_duplicates(rc_final, info.get("datum_zpravy") or "")
if duplicity:
info_lines.append(f"⚠ DUPLICITA: {', '.join(duplicity)}")
if not info_lines:
info_lines = ["[Claude nevrátil název — uprav ručně]"]
print(" Otevírám dialog pro schválení názvu...")
final_name = run_rename_dialog(nazev, info_lines, below_y=below_y)
preview.terminate()
if not final_name:
print(" Přeskočeno.")
return
if not final_name.endswith(".pdf"):
final_name += ".pdf"
final_name = re.sub(r'[<>:"/\\|?*]', '', final_name)
if nazev and final_name != nazev:
save_correction(nazev, final_name)
print(f" Schválený název: {final_name}")
# 4. Generuj kompresní varianty (originál + 5 variant)
print(" Generuji kompresní varianty...")
temp_files = []
orig_kb = round(pdf_path.stat().st_size / 1024)
variants_data = [{"path": str(pdf_path), "label": "Originál", "size_kb": orig_kb}]
for label, dpi, quality in COMPRESS_VARIANTS:
tmp = compress_to_temp(pdf_path, dpi, quality)
size_kb = round(tmp.stat().st_size / 1024)
temp_files.append(tmp)
variants_data.append({"path": str(tmp), "label": label, "size_kb": size_kb})
print(f" {label}: {size_kb} kB")
# 5. Vyber variantu
print(" Vyber variantu v okně...")
chosen = run_variant_picker(variants_data)
if not chosen:
print(" Žádná varianta nevybrána, přeskakuji.")
for t in temp_files:
t.unlink(missing_ok=True)
return
# 6. Ulož do Processed
PROCESSED.mkdir(exist_ok=True)
dest = PROCESSED / final_name
if dest.exists():
print(f" VAROVÁNÍ: '{final_name}' již existuje, přeskakuji.")
else:
shutil.copy2(chosen, dest)
pdf_path.unlink()
print(f" ✓ Uloženo: {dest.name}")
for t in temp_files:
t.unlink(missing_ok=True) # originál mezi temp_files není, je bezpečné
def process_folder(folder: Path):
files = sorted(f for f in folder.iterdir() if f.suffix.lower() in (".pdf", ".jpg", ".jpeg", ".png"))
if not files:
print(f"Žádné soubory v: {folder}")
return
print(f"Nalezeno {len(files)} soubor(ů).")
for f in files:
try:
process_file(f)
except Exception as e:
print(f" CHYBA: {e}")
print("\nHotovo.")
if __name__ == "__main__":
PROCESSED.mkdir(exist_ok=True)
TO_PROCESS.mkdir(exist_ok=True)
target = Path(sys.argv[1]) if len(sys.argv) > 1 else TO_PROCESS
if target.is_file():
process_file(target)
elif target.is_dir():
process_folder(target)
else:
print("Použití: python extract_patient_info_novy.py [soubor.pdf nebo složka]")
sys.exit(1)
+14 -1
View File
@@ -90,7 +90,20 @@ def main():
show(0) show(0)
root.update_idletasks() root.update_idletasks()
root.geometry("+0+0") sw = root.winfo_screenwidth()
w = root.winfo_width()
h = root.winfo_height()
x = (sw - w) // 2
root.geometry(f"+{x}+0")
# Zapiš geometrii do souboru pokud byl předán argument --write-geometry=<cesta>
import json as _json
for arg in sys.argv:
if arg.startswith("--write-geometry="):
geom_path = Path(arg.split("=", 1)[1])
geom_path.write_text(_json.dumps({"x": x, "y": 0, "w": w, "h": h}), encoding="utf-8")
break
root.mainloop() root.mainloop()
+93
View File
@@ -0,0 +1,93 @@
"""
Standalone dialog pro schválení / opravu názvu souboru.
Spouští se jako subprocess z extract_patient_info.py.
Argumenty: rename_dialog.py <json_soubor>
JSON vstup: { "nazev": "...", "info_lines": [...] }
JSON výstup: { "value": "..." } nebo { "value": null }
"""
import json
import sys
from pathlib import Path
import tkinter as tk
def main():
if len(sys.argv) < 2:
print(json.dumps({"value": None}))
sys.exit(0)
data = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
nazev = data.get("nazev") or ""
info_lines = data.get("info_lines") or []
result = {"value": None}
root = tk.Tk()
root.title("Schválení názvu souboru")
root.resizable(True, False)
root.attributes("-topmost", True)
root.tk.call("encoding", "system", "utf-8")
pad = {"padx": 12, "pady": 6}
frame_info = tk.Frame(root, bg="#f0f0f0", bd=1, relief="sunken")
frame_info.pack(fill="x", **pad)
for line in info_lines:
color = "#b00000" if line.startswith("") else "#004080" if line.startswith("") else "#333"
tk.Label(frame_info, text=line, anchor="w", bg="#f0f0f0",
fg=color, font=("Segoe UI", 10)).pack(fill="x", padx=8, pady=1)
tk.Label(root, text="Název souboru (bez .pdf):", anchor="w",
font=("Segoe UI", 9, "bold")).pack(fill="x", padx=12, pady=(10, 2))
nazev_bez = nazev[:-4] if nazev.endswith(".pdf") else nazev
var = tk.StringVar(value=nazev_bez)
entry = tk.Entry(root, textvariable=var, font=("Segoe UI", 10), width=90)
entry.pack(fill="x", padx=12, pady=(0, 10))
entry.icursor(tk.END)
entry.focus_set()
frame_btn = tk.Frame(root)
frame_btn.pack(pady=(0, 12))
def schvalit(event=None):
result["value"] = var.get().strip()
root.destroy()
def preskocit(event=None):
result["value"] = None
root.destroy()
tk.Button(frame_btn, text="✓ Schválit (Enter)", command=schvalit,
bg="#2a7a2a", fg="white", font=("Segoe UI", 10, "bold"),
padx=16, pady=6).pack(side="left", padx=8)
tk.Button(frame_btn, text="✗ Přeskočit (Esc)", command=preskocit,
bg="#7a2a2a", fg="white", font=("Segoe UI", 10),
padx=16, pady=6).pack(side="left", padx=8)
root.bind("<Return>", schvalit)
root.bind("<Escape>", preskocit)
root.update_idletasks()
sw = root.winfo_screenwidth()
w = root.winfo_width()
x = (sw - w) // 2
# Pozice pod preview oknem pokud byl předán argument --below-y=N
below_y = None
for arg in sys.argv:
if arg.startswith("--below-y="):
below_y = int(arg.split("=", 1)[1])
break
y = below_y if below_y is not None else (root.winfo_screenheight() - root.winfo_height() - 60)
root.geometry(f"+{x}+{y}")
root.lift()
root.focus_force()
root.mainloop()
print(json.dumps({"value": result["value"]}, ensure_ascii=False))
if __name__ == "__main__":
main()
+148
View File
@@ -0,0 +1,148 @@
"""
Jedno okno pro výběr kompresní varianty PDF.
Nahoře tlačítka 1N pro přepínání, tlačítko "Tohle beru" pro potvrzení.
Argumenty: variant_picker.py <json_soubor>
JSON vstup: [{"path": "...", "label": "150 DPI / q80", "size_kb": 139}, ...]
JSON výstup (stdout): {"chosen": "cesta/k/souboru"}
"""
import json
import sys
from pathlib import Path
import tkinter as tk
from PIL import Image, ImageTk
import fitz
def main():
if len(sys.argv) < 2:
sys.exit(1)
variants = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
chosen = {"path": None}
docs = [fitz.open(v["path"]) for v in variants]
current = [0]
photo_ref = [None]
root = tk.Tk()
root.tk.call("encoding", "system", "utf-8")
root.attributes("-topmost", True)
sh = root.winfo_screenheight()
sw = root.winfo_screenwidth()
win_h = sh - 80 # odečteme taskbar + title bar
img_h = win_h - 160
img_w = sw // 2 # šířka okna = polovina monitoru
x = (sw - img_w) // 2
root.geometry(f"{img_w}x{win_h}+{x}+0")
root.resizable(False, False)
# ── Horní panel s tlačítky variant ──
frame_top = tk.Frame(root, bg="#222")
frame_top.pack(fill="x")
btn_variants = []
current_page = [0]
def show(n, page_n=0):
current[0] = n
current_page[0] = page_n
doc = docs[n]
page = doc[page_n]
zoom = min(img_w / page.rect.width, img_h / page.rect.height)
pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom))
img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
photo_ref[0] = ImageTk.PhotoImage(img)
lbl_img.config(image=photo_ref[0])
page_count = len(doc)
root.title(f"Varianta {n+1}: {variants[n]['label']} ({variants[n]['size_kb']} kB) — strana {page_n+1}/{page_count}")
for i, b in enumerate(btn_variants):
b.config(bg="#2a5a9a" if i == n else "#444")
btn_prev_page.config(state="normal" if page_n > 0 else "disabled")
btn_next_page.config(state="normal" if page_n < page_count - 1 else "disabled")
for i, v in enumerate(variants):
b = tk.Button(
frame_top,
text=f"{i+1}. {v['label']}\n{v['size_kb']} kB",
font=("Segoe UI", 9, "bold"),
bg="#444", fg="white",
relief="flat", padx=8, pady=6,
command=lambda n=i: show(n),
)
b.pack(side="left", padx=2, pady=4)
btn_variants.append(b)
# ── Tlačítka Beru / Přeskočit — stejný styl jako varianty ──
def beru():
chosen["path"] = variants[current[0]]["path"]
root.destroy()
def preskocit():
root.destroy()
tk.Button(
frame_top,
text="✓ Tohle beru\n",
command=beru,
bg="#2a7a2a", fg="white",
font=("Segoe UI", 9, "bold"),
relief="flat", padx=8, pady=6,
).pack(side="left", padx=2, pady=4)
tk.Button(
frame_top,
text="✗ Přeskočit\n",
command=preskocit,
bg="#7a2a2a", fg="white",
font=("Segoe UI", 9, "bold"),
relief="flat", padx=8, pady=6,
).pack(side="left", padx=2, pady=4)
# ── Navigace stran — úplně vpravo ──
btn_next_page = tk.Button(
frame_top,
text="Další ►\n",
command=lambda: show(current[0], current_page[0] + 1),
bg="#555", fg="white",
font=("Segoe UI", 9, "bold"),
relief="flat", padx=8, pady=6,
)
btn_next_page.pack(side="right", padx=2, pady=4)
btn_prev_page = tk.Button(
frame_top,
text="◄ Před.\n",
command=lambda: show(current[0], current_page[0] - 1),
bg="#555", fg="white",
font=("Segoe UI", 9, "bold"),
relief="flat", padx=8, pady=6,
)
btn_prev_page.pack(side="right", padx=2, pady=4)
# ── Obrázek ──
lbl_img = tk.Label(root, bg="black")
lbl_img.pack(fill="both", expand=True)
root.bind("<Key-1>", lambda e: show(0))
root.bind("<Key-2>", lambda e: show(1))
root.bind("<Key-3>", lambda e: show(2))
root.bind("<Key-4>", lambda e: show(3))
root.bind("<Key-5>", lambda e: show(4))
root.bind("<Return>", lambda e: beru())
root.bind("<Escape>", lambda e: preskocit())
show(0)
root.mainloop()
for d in docs:
try:
d.close()
except Exception:
pass
print(json.dumps({"chosen": chosen["path"]}, ensure_ascii=False))
if __name__ == "__main__":
main()