diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 33935db..98ac813 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -5,7 +5,12 @@ "Bash(ls -la \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\")", "Bash(find \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\" -type f)", "Bash(grep -E '\\\\.\\(py|json|txt|md|yaml|yml\\)$')", - "Bash(ls -la \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\\\\\\\\Processed\" \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\\\\\\\\ToProcess\")" + "Bash(ls -la \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\\\\\\\\Processed\" \"U:\\\\\\\\Medevio\\\\\\\\60 ScansProcessing\\\\\\\\ToProcess\")", + "Bash(python -c ' *)", + "Bash(gs --version)", + "Bash(where gs *)", + "Bash(python -c \"import pypdf; print\\('pypdf ok'\\)\")", + "Bash(python -c \"import fitz; print\\('pymupdf ok', fitz.version\\)\")" ] } } diff --git a/12 Tower1/50 SaveToFileSystem incremental.py b/12 Tower1/50 SaveToFileSystem incremental.py index fb0e3fd..ee6925a 100644 --- a/12 Tower1/50 SaveToFileSystem incremental.py +++ b/12 Tower1/50 SaveToFileSystem incremental.py @@ -7,6 +7,7 @@ import pymysql import re from pathlib import Path from datetime import datetime +from collections import defaultdict import time import sys @@ -112,6 +113,7 @@ cur_meta.execute(""" p.displayTitle FROM medevio_downloads d JOIN pozadavky p ON d.request_id = p.id + WHERE p.updatedAt >= DATE_SUB(NOW(), INTERVAL 14 DAY) ORDER BY p.updatedAt DESC """) @@ -122,40 +124,28 @@ safe_print(f"📋 Found {len(rows)} attachment records.\n") # 🧠 MAIN LOOP WITH PROGRESS # ============================== -unique_request_ids = [] -seen = set() +# Group rows by request_id in Python — avoids N extra SELECT filename queries +rows_by_request = defaultdict(list) for r in rows: - req_id = r["request_id"] - if req_id not in seen: - unique_request_ids.append(req_id) - seen.add(req_id) + rows_by_request[r["request_id"]].append(r) -total_requests = len(unique_request_ids) +total_requests = len(rows_by_request) safe_print(f"🔄 Processing {total_requests} unique requests...\n") -processed_requests = set() -current_index = 0 +# Pre-index BASE_DIR once — avoids iterdir() called twice per request +folder_list = [(f, f.name) for f in BASE_DIR.iterdir() if f.is_dir()] -for r in rows: - req_id = r["request_id"] - - if req_id in processed_requests: - continue - processed_requests.add(req_id) - - current_index += 1 +for current_index, (req_id, req_rows) in enumerate(rows_by_request.items(), 1): percent = (current_index / total_requests) * 100 - safe_print(f"\n[ {percent:5.1f}% ] Processing request {current_index} / {total_requests} → {req_id}") - # ========== FETCH VALID FILENAMES ========== - cur_meta.execute( - "SELECT filename FROM medevio_downloads WHERE request_id=%s", - (req_id,) - ) - valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()} + # ========== VALID FILENAMES from already-loaded rows ========== + # original filename → sanitized name (needed for DB query later) + file_map = {sanitize_name(r["filename"]): r["filename"] for r in req_rows} + valid_files = set(file_map.keys()) # ========== BUILD FOLDER NAME ========== + r = req_rows[0] updated_at = r["req_updated_at"] or datetime.now() date_str = updated_at.strftime("%Y-%m-%d") @@ -168,21 +158,15 @@ for r in rows: f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}" ) - # ========== DETECT EXISTING FOLDER ========== - existing_folder = None - - for f in BASE_DIR.iterdir(): - if f.is_dir() and req_id in f.name: - existing_folder = f - break + # ========== DETECT EXISTING FOLDER from pre-built index ========== + req_id_str = str(req_id) + matching = [f for f, name in folder_list if req_id_str in name] + existing_folder = matching[0] if matching else None main_folder = existing_folder if existing_folder else BASE_DIR / clean_folder_name # ========== MERGE DUPLICATES ========== - possible_dups = [ - f for f in BASE_DIR.iterdir() - if f.is_dir() and req_id in f.name and f != main_folder - ] + possible_dups = [f for f, name in folder_list if req_id_str in name and f != main_folder] for dup in possible_dups: safe_print(f"♻️ Merging duplicate folder: {dup.name}") @@ -201,36 +185,32 @@ for r in rows: # ========== CLEAN MAIN FOLDER ========== clean_folder(main_folder, valid_files) - # ========== DOWNLOAD MISSING FILES ========== - added_new_file = False + # ========== DOWNLOAD MISSING FILES (batch blob fetch per request) ========== main_folder.mkdir(parents=True, exist_ok=True) + added_new_file = False - for filename in valid_files: - dest_plain = main_folder / filename - dest_marked = main_folder / ("▲" + filename) - - if dest_plain.exists() or dest_marked.exists(): - continue - - added_new_file = True + missing_san = [ + fn for fn in valid_files + if not (main_folder / fn).exists() and not (main_folder / ("▲" + fn)).exists() + ] + if missing_san: + # Fetch all missing blobs in a single query instead of one per file + missing_orig = [file_map[fn] for fn in missing_san] + placeholders = ",".join(["%s"] * len(missing_orig)) cur_blob.execute( - "SELECT file_content FROM medevio_downloads " - "WHERE request_id=%s AND filename=%s", - (req_id, filename) + f"SELECT filename, file_content FROM medevio_downloads " + f"WHERE request_id=%s AND filename IN ({placeholders})", + [req_id] + missing_orig, ) - row = cur_blob.fetchone() - if not row: - continue - - content = row[0] - if not content: - continue - - with open(dest_plain, "wb") as f: - f.write(content) - - safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}") + for blob_filename, content in cur_blob.fetchall(): + if not content: + continue + dest_plain = main_folder / sanitize_name(blob_filename) + with open(dest_plain, "wb") as fh: + fh.write(content) + safe_print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}") + added_new_file = True # ========== REMOVE ▲ FLAG IF NEW FILES ADDED ========== if added_new_file and "▲" in main_folder.name: diff --git a/50 Různé testy/MinimizeOptimizePDF/compress_pdf.py b/50 Různé testy/MinimizeOptimizePDF/compress_pdf.py new file mode 100644 index 0000000..18b0a88 --- /dev/null +++ b/50 Různé testy/MinimizeOptimizePDF/compress_pdf.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Compress PDF — output DPI and JPEG quality are chosen automatically +based on the detected resolution of the source PDF. + +Usage: python compress_pdf.py [output.pdf] + python compress_pdf.py (processes all PDFs in current folder) +Output filename: original_name (139 kB).pdf +""" + +import sys +import fitz +from pathlib import Path + +# ============================== +# COMPRESSION TABLE +# Detected source DPI -> (output DPI, JPEG quality) +# Rows are evaluated top-to-bottom; first match wins. +# ============================== +# +# src_dpi_min src_dpi_max out_dpi jpeg_quality +COMPRESSION_TABLE = [ + ( 0, 99, 72, 60), # very low res — already small, compress hard + ( 100, 149, 100, 70), # low res + ( 150, 249, 150, 80), # standard scan (our tested sweet spot) + ( 250, 399, 150, 80), # good scan — downsample to 150 is fine + ( 400, 599, 200, 85), # high res scan + ( 600, 9999, 150, 80), # very high res / professional scan +] + + +def detect_source_dpi(src: fitz.Document) -> int: + """Estimate source DPI from the largest image on the first page.""" + page = src[0] + images = page.get_images(full=True) + if not images: + return 150 # no raster images — use default + + # Find the largest image by pixel area + best = max(images, key=lambda img: img[2] * img[3]) # width * height + img_w_px, img_h_px = best[2], best[3] + + # Page size in inches (1 point = 1/72 inch) + page_w_in = page.rect.width / 72.0 + page_h_in = page.rect.height / 72.0 + + dpi_x = img_w_px / page_w_in if page_w_in else 0 + dpi_y = img_h_px / page_h_in if page_h_in else 0 + return round((dpi_x + dpi_y) / 2) + + +def pick_settings(source_dpi: int) -> tuple[int, int]: + for min_dpi, max_dpi, out_dpi, quality in COMPRESSION_TABLE: + if min_dpi <= source_dpi <= max_dpi: + return out_dpi, quality + # fallback to last row + return COMPRESSION_TABLE[-1][2], COMPRESSION_TABLE[-1][3] + + +def compress(input_path: Path, output_path: Path = None): + src = fitz.open(input_path) + + source_dpi = detect_source_dpi(src) + out_dpi, jpeg_quality = pick_settings(source_dpi) + + print(f" zdroj ~{source_dpi} DPI -> komprese {out_dpi} DPI / JPEG q{jpeg_quality}") + + zoom = out_dpi / 72.0 + mat = fitz.Matrix(zoom, zoom) + + out_doc = fitz.open() + for page in src: + pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB) + img_bytes = pix.tobytes("jpeg", jpg_quality=jpeg_quality) + img_doc = fitz.open("pdf", fitz.open("jpeg", img_bytes).convert_to_pdf()) + rect = page.rect + new_page = out_doc.new_page(width=rect.width, height=rect.height) + new_page.show_pdf_page(new_page.rect, img_doc, 0) + src.close() + + tmp = input_path.with_suffix(".tmp.pdf") + out_doc.save(tmp, deflate=True, garbage=4) + out_doc.close() + + size_kb = round(tmp.stat().st_size / 1024) + + if output_path is None: + output_path = input_path.parent / f"{input_path.stem} ({size_kb} kB).pdf" + + if output_path.exists(): + output_path.unlink() + tmp.rename(output_path) + + orig_kb = round(input_path.stat().st_size / 1024) + saving = (1 - size_kb / orig_kb) * 100 + print(f" {input_path.name} -> {output_path.name} (bylo {orig_kb} kB, uspora {saving:.0f}%)") + + +if __name__ == "__main__": + if len(sys.argv) >= 2: + inp = Path(sys.argv[1]) + out = Path(sys.argv[2]) if len(sys.argv) >= 3 else None + compress(inp, out) + else: + folder = Path(__file__).parent + pdfs = [p for p in folder.glob("*.pdf") if not p.name.endswith(").pdf") and p.stem != Path(__file__).stem] + if not pdfs: + print("Zadne PDF k zpracovani.") + for pdf in pdfs: + compress(pdf) diff --git a/50 Různé testy/MinimizeOptimizePDF/compress_variants.py b/50 Různé testy/MinimizeOptimizePDF/compress_variants.py new file mode 100644 index 0000000..227092a --- /dev/null +++ b/50 Různé testy/MinimizeOptimizePDF/compress_variants.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Compress a PDF into multiple variants at different DPI / JPEG quality settings. +Uses PyMuPDF (fitz) — renders each page as JPEG image, saves back as PDF. +""" + +import sys +import fitz # PyMuPDF +from pathlib import Path + +INPUT = Path(r"u:\Medevio\50 Různé testy\MinimizeOptimizePDF\afd1823b-8277-44a2-84e1-db89a0ccd134.pdf") +OUT_DIR = INPUT.parent + +VARIANTS = [ + # (label, dpi, jpeg_quality) + ("300dpi_q90", 300, 90), + ("200dpi_q85", 200, 85), + ("150dpi_q80", 150, 80), + ("120dpi_q75", 120, 75), + ("96dpi_q70", 96, 70), + ("72dpi_q60", 72, 60), +] + +src = fitz.open(INPUT) +original_size = INPUT.stat().st_size +print(f"Originál: {INPUT.name} ({original_size / 1024:.0f} KB)\n") +print(f"{'Varianta':<20} {'DPI':>5} {'Kvalita':>8} {'Velikost':>12} {'Úspora':>8}") +print("-" * 58) + +for label, dpi, quality in VARIANTS: + out_path = OUT_DIR / f"{INPUT.stem}_{label}.pdf" + zoom = dpi / 72.0 + mat = fitz.Matrix(zoom, zoom) + + out_doc = fitz.open() + for page in src: + pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB) + img_bytes = pix.tobytes("jpeg", jpg_quality=quality) + + # Create a new PDF page with the same physical dimensions + img_doc = fitz.open("pdf", fitz.open("jpeg", img_bytes).convert_to_pdf()) + # Scale page back to original size + rect = page.rect + new_page = out_doc.new_page(width=rect.width, height=rect.height) + new_page.show_pdf_page(new_page.rect, img_doc, 0) + + out_doc.save(out_path, deflate=True, garbage=4) + out_doc.close() + + size = out_path.stat().st_size + size_kb = round(size / 1024) + final_path = OUT_DIR / f"{INPUT.stem}_{label} ({size_kb} kB).pdf" + out_path.rename(final_path) + + saving = (1 - size / original_size) * 100 + print(f"{label:<20} {dpi:>5} {quality:>8} {size_kb:>9} kB {saving:>7.0f}%") + +src.close() +print("\nHotovo.") diff --git a/60 ScansProcessing/ToProcess/0cfe0dea-c7bf-47f1-b4a2-6fb0f54d4362.pdf b/60 ScansProcessing/ToProcess/0cfe0dea-c7bf-47f1-b4a2-6fb0f54d4362.pdf new file mode 100644 index 0000000..8b06400 Binary files /dev/null and b/60 ScansProcessing/ToProcess/0cfe0dea-c7bf-47f1-b4a2-6fb0f54d4362.pdf differ diff --git a/60 ScansProcessing/corrections.json b/60 ScansProcessing/corrections.json index ced2f69..bd8cd13 100644 --- a/60 ScansProcessing/corrections.json +++ b/60 ScansProcessing/corrections.json @@ -374,5 +374,69 @@ { "original": "8452 2026-04-02 Věkrbeová [Laboratoř] [moč chemicky: ERY trace, ostatní neg., pH 6, SG 1.020].pdf", "corrected": "8755120429 2026-04-02 [uritex] [moč chemicky ERY trace, ostatní neg., pH 6, SG 1.020].pdf" + }, + { + "original": "461001479 2026-04-21 Šťastný, Libor [LZ endokrinologie] [St.p. TTE dx a STE sin 5/18, strumiprivní hypotyreóza substituovaná, Letrox 150ug].pdf", + "corrected": "461001479 2026-04-21 Šťastný, Libor [LZ endokrinologie] [St.p. TTE dx a STE sin 518, strumiprivní hypotyreóza substituovaná, Letrox 150ug].pdf" + }, + { + "original": "9901040000 2026-04-26 Tvrz, Matěj [export zdraví krevní tlak] [prům. 153/74 mmHg, hypertenze 5d, emergentní hypertenzní stav 1d].pdf", + "corrected": "9901040000 2026-04-26 Tvrz, Matěj [export zdraví krevní tlak] [prům. 15374 mmHg, hypertenze 5d, emergentní hypertenzní stav 1d].pdf" + }, + { + "original": "395907022 2026-04-10 Herzová, Marie [LZ ortopedie] [Gonarthrosis bilat., obstr. kortik. +M i.a. vlevo, indik. lázně VII7, M179].pdf", + "corrected": "395907022 2026-04-10 Herzová, Marie [LZ ortopedie] [indikace lázně VII7, M179, gonarthrosis bilat., obstr. kortik. +M i.a. vlevo].pdf" + }, + { + "original": "6008091738 2020-07-15 Nikitin, Petro [LZ gastroenterologie] [Antrumgastritida, inkompetentní kardie, gastroesophageální reflux].pdf", + "corrected": "6008091738 2020-07-15 Nikitin, Petro [LZ gastro] [gastroskopie, antrumgastritida, inkompetentní kardie, gastroesophageální reflux].pdf" + }, + { + "original": "6008091738 2025-11-24 Nikitin, Petro [LZ dermatologie] [seboroická verruka].pdf", + "corrected": "6008091738 2025-11-24 Nikitin, Petro [LZ kožní [seboroická verruka L tváøe, abraze].pdf" + }, + { + "original": "6008091738 2025-08-25 Nikitin, Petro [LZ kožní] [pigmentové névy tč. klidné, bez onkosuspekce].pdf", + "corrected": "6008091738 2025-08-25 Nikitin, Petro [LZ kožní] [vyšetøení dermatoskopem, pigmentové névy tč. klidné, bez onkosuspekce].pdf" + }, + { + "original": "6008091738 2025-05-20 Nikitin, Petro [LZ kardiologie] [ICHS, po PCI RIA 2018, EF LK 65%, mírná dilatace aort. kořene bez progrese].pdf", + "corrected": "6008091738 2025-05-20 Nikitin, Petro [LZ kardiologie] [kontrola, ICHS, po PCI RIA 2018, EF LK 65%, mírná dilatace aort. kořene bez progrese].pdf" + }, + { + "original": "6008091738 2018-08-24 Nikitin, Petro [RTG páteře] [C páteř: lordosa oploštělá, C56 zúžen, spondylóza; Th: skolióza, kyfóza, Th7-10].pdf", + "corrected": "6008091738 2018-08-24 Nikitin, Petro [RTG páteře] [C páteř lordosa oploštělá, C56 zúžen, spondylóza; Th skolióza, kyfóza, Th7-10].pdf" + }, + { + "original": "7109203893 2026-04-07 Deyak, Mykhaylo [Laboratoř] [glukóza 7,1, HbA1c 36, chol. 4,49, LDL 3,07, HDL 0,99, osmolalita 301, PSA 1,438].pdf", + "corrected": "7109203893 2026-04-07 Deyak, Mykhaylo [Laboratoř] [Z000, glukóza 7,1, HbA1c 36, chol. 4,49, LDL 3,07, HDL 0,99, osmolalita 301, PSA 1,438].pdf" + }, + { + "original": "415414073 2026-04-21 Pekárková, Vlasta [Laboratoř] [Z000, K 5,8, osmolalita 296, glukóza 5,7, HbA1c 41, CKD-EPI 0,92 G3a, trombocyty 140].pdf", + "corrected": "415414073 2026-04-21 Pekárková, Vlasta [Laboratoř] [Z000, prediabetes, K 5,8, osmolalita 296, glukóza 5,7, HbA1c 41, CKD-EPI 0,92 G3a, trombocyty 140].pdf" + }, + { + "original": "505218025 2026-04-22 Beznosková, Milena [Laboratoř] [E789, urea 8,31, CKD-EPI 1,33 G2, osmolalita 302, glukóza 7,5, CK 5,49].pdf", + "corrected": "505218025 2026-04-22 Beznosková, Milena [Laboratoř] [E789, diabetes, urea 8,31, CKD-EPI 1,33 G2, osmolalita 302, glukóza 7,5, CK 5,49].pdf" + }, + { + "original": "500206172 2026-04-22 Beznoska, Miloslav [Laboratoř] [E789, CKD-EPI 1,21 G2, glukóza 5,9, HbA1c 41, LDL 3,29].pdf", + "corrected": "500206172 2026-04-22 Beznoska, Miloslav [Laboratoř] [E789, prediabetes, CKD-EPI 1,21 G2, glukóza 5,9, HbA1c 41, LDL 3,29].pdf" + }, + { + "original": "475915054 2026-04-20 Žabová, Věra [Laboratoř] [moč: E. coli 10E5 CFU/ml, citlivá na ampicilin, cefuroxim, cotrimoxazol, pivmecilinam].pdf", + "corrected": "475915054 2026-04-20 Žabová, Věra [Laboratoř] [N309, kultivace a citlivost, moč E. coli 10E5 CFUml, citlivá na ampicilin, cefuroxim, cotrimoxazol, pivmecilinam].pdf" + }, + { + "original": "7059087629 2026-04-13 Tůmová, Renáta [Laboratoř] [E789, chol. 7,34, LDL 4,52, non-HDL 5,53, glukóza 5,83, CKD-EPI 1,42 G2].pdf", + "corrected": "7059087629 2026-04-13 Tůmová, Renáta [Laboratoř] [E789, smíšená hyperlipidémie, prediabetes, chol. 7,34, LDL 4,52, non-HDL 5,53, glukóza 5,83, CKD-EPI 1,42 G2].pdf" + }, + { + "original": "7352200328 2026-04-10 Vališová, Gabriela [Laboratoř] [Z000, chol. 5,62, LDL 3,19, HDL 1,13, TG 4,29, non-HDL 4,5, glukóza 5,4].pdf", + "corrected": "7352200328 2026-04-10 Vališová, Gabriela [Laboratoř] [Z000, smíšená hyperlipidémie, chol. 5,62, LDL 3,19, HDL 1,13, TG 4,29, non-HDL 4,5, glukóza 5,4].pdf" + }, + { + "original": "6757100592 2026-04-16 Slabá, Radka [Laboratoř] [E789, CKD-EPI 1,31 G2, TG 1,90, glukóza 5,8, HbA1c 36, chol. 4,35, LDL 2,10].pdf", + "corrected": "6757100592 2026-04-16 Slabá, Radka [Laboratoř] [E789, prediabetes, CKD-EPI 1,31 G2, TG 1,90, glukóza 5,8, HbA1c 36, chol. 4,35, LDL 2,10].pdf" } ] \ No newline at end of file diff --git a/60 ScansProcessing/extract_patient_info.py b/60 ScansProcessing/extract_patient_info.py index 94af2dd..6867c17 100644 --- a/60 ScansProcessing/extract_patient_info.py +++ b/60 ScansProcessing/extract_patient_info.py @@ -382,77 +382,27 @@ def _open_preview(root, pdf_path: Path): def _rename_dialog(nazev: str, info_lines: list[str]) -> str | None: """ - Tkinter dialog pro schválení / opravu názvu souboru. + Spustí rename_dialog.py jako subprocess — vyhneme se Tkinter konfliktům s PyCharm. Vrátí finální název (s .pdf) nebo None = přeskočit. """ - import tkinter as tk + import tempfile - result = {"value": None} + data = {"nazev": nazev, "info_lines": info_lines} + tmp = Path(tempfile.mktemp(suffix=".json")) + tmp.write_text(json.dumps(data, ensure_ascii=False), encoding="utf-8") - root = tk.Tk() - root.withdraw() - root.tk.call("encoding", "system", "utf-8") - - dlg = tk.Toplevel(root) - dlg.title("Schválení názvu souboru") - dlg.resizable(True, False) - dlg.attributes("-topmost", True) - - pad = {"padx": 12, "pady": 6} - - # Informační sekce - frame_info = tk.Frame(dlg, bg="#f0f0f0", bd=1, relief="sunken") - frame_info.pack(fill="x", **pad) - for line in info_lines: - color = "#b00000" if line.startswith("⚠") else "#004080" if line.startswith("✓") else "#333" - tk.Label(frame_info, text=line, anchor="w", bg="#f0f0f0", - fg=color, font=("Segoe UI", 10)).pack(fill="x", padx=8, pady=1) - - # Pole pro název (bez .pdf) - tk.Label(dlg, text="Název souboru (bez .pdf):", anchor="w", - font=("Segoe UI", 9, "bold")).pack(fill="x", padx=12, pady=(10, 2)) - - nazev_bez = nazev[:-4] if nazev and nazev.endswith(".pdf") else (nazev or "") - var = tk.StringVar(value=nazev_bez) - entry = tk.Entry(dlg, textvariable=var, font=("Segoe UI", 10), width=90) - entry.pack(fill="x", padx=12, pady=(0, 10)) - entry.icursor(tk.END) - entry.focus_set() - - # Tlačítka - frame_btn = tk.Frame(dlg) - frame_btn.pack(pady=(0, 12)) - - def schvalit(event=None): - result["value"] = var.get().strip() - root.destroy() - - def preskocit(event=None): - result["value"] = None - root.destroy() - - tk.Button(frame_btn, text="✓ Schválit (Enter)", command=schvalit, - bg="#2a7a2a", fg="white", font=("Segoe UI", 10, "bold"), - padx=16, pady=6).pack(side="left", padx=8) - tk.Button(frame_btn, text="✗ Přeskočit (Esc)", command=preskocit, - bg="#7a2a2a", fg="white", font=("Segoe UI", 10), - padx=16, pady=6).pack(side="left", padx=8) - - dlg.bind("", schvalit) - dlg.bind("", preskocit) - - # Umísti dialog vpravo od náhledu (nebo vystředit pokud náhled není) - dlg.update_idletasks() - sw = dlg.winfo_screenwidth() - sh = dlg.winfo_screenheight() - w = dlg.winfo_width() - h = dlg.winfo_height() - x = min(720, sw - w - 20) - y = (sh - h) // 2 - dlg.geometry(f"+{x}+{y}") - - root.mainloop() - return result["value"] + dialog_script = Path(__file__).parent / "rename_dialog.py" + try: + proc = subprocess.run( + [sys.executable, str(dialog_script), str(tmp)], + capture_output=True, text=True, encoding="utf-8", + ) + output = proc.stdout.strip() + if output: + return json.loads(output).get("value") + return None + finally: + tmp.unlink(missing_ok=True) def print_verification(verif: dict, rc_from_scan: str): @@ -564,7 +514,6 @@ def _start_preview_process(pdf_path: Path): viewer = Path(__file__).parent / "preview_viewer.py" proc = subprocess.Popen( [sys.executable, str(viewer), str(tmp), "--delete-on-close"], - creationflags=subprocess.CREATE_NO_WINDOW if hasattr(subprocess, "CREATE_NO_WINDOW") else 0, ) def close(): diff --git a/60 ScansProcessing/extract_patient_info_novy.py b/60 ScansProcessing/extract_patient_info_novy.py new file mode 100644 index 0000000..d7aa515 --- /dev/null +++ b/60 ScansProcessing/extract_patient_info_novy.py @@ -0,0 +1,449 @@ +""" +Zpracování naskenovaných PDF — nová verze. +1. Preview originálu + Claude Vision API +2. Rename dialog +3. 5 variant komprese → uživatel vybere +4. Uložit do Processed, smazat originál +""" +import base64 +import gc +import io +import json +import os +import re +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +if sys.platform == "win32": + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace") + +import anthropic +from pdf2image import convert_from_path + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from Knihovny.najdi_dropbox import get_dropbox_root +from Knihovny.najdi_medicus import get_medicus_config + +def _load_env(): + env_path = Path(__file__).parent.parent / ".env" + if env_path.exists(): + for line in env_path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if "=" in line and not line.startswith("#"): + k, v = line.split("=", 1) + os.environ[k.strip()] = v.strip() + +_load_env() + +POPPLER_PATH = r"C:/Poppler/Library/bin" +_DROPBOX = Path(get_dropbox_root()) +TO_PROCESS = _DROPBOX / r"Ordinace\Dokumentace_ke_zpracování\Ricoh Fi-8040\KeZpracování" +PROCESSED = _DROPBOX / r"Ordinace\Dokumentace_ke_zpracování\Ricoh Fi-8040\Zpracováno" +CORRECTIONS_FILE = Path(__file__).parent / "corrections.json" +DOKUMENTACE = _DROPBOX / r"Ordinace\Dokumentace_zpracovaná" + +import threading + +_dokumentace_index: set[str] = set() +_dokumentace_ready = threading.Event() + +def _load_dokumentace_index_bg(): + if DOKUMENTACE.exists(): + names = {f.name for f in DOKUMENTACE.iterdir() if f.is_file()} + else: + names = set() + global _dokumentace_index + _dokumentace_index = names + _dokumentace_ready.set() + print(f" Index dokumentace: {len(names)} souborů načteno.") + +def start_dokumentace_index(): + t = threading.Thread(target=_load_dokumentace_index_bg, daemon=True) + t.start() + +VIEWER = Path(__file__).parent / "preview_viewer.py" +RENAME_DIALOG = Path(__file__).parent / "rename_dialog.py" +VARIANT_PICKER = Path(__file__).parent / "variant_picker.py" + +# 5 kompresních variant +COMPRESS_VARIANTS = [ + ("300 DPI / q90", 300, 90), + ("200 DPI / q85", 200, 85), + ("150 DPI / q80", 150, 80), + ("120 DPI / q75", 120, 75), + ( "96 DPI / q70", 96, 70), +] + + +# ─── Komprese jedné varianty ────────────────────────────────────────────────── + +def compress_to_temp(pdf_path: Path, dpi: int, quality: int) -> Path: + import fitz + src = fitz.open(str(pdf_path)) + mat = fitz.Matrix(dpi / 72.0, dpi / 72.0) + out = fitz.open() + for page in src: + pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB) + img_bytes = pix.tobytes("jpeg", jpg_quality=quality) + img_doc = fitz.open("pdf", fitz.open("jpeg", img_bytes).convert_to_pdf()) + rect = page.rect + np = out.new_page(width=rect.width, height=rect.height) + np.show_pdf_page(np.rect, img_doc, 0) + src.close() + tmp = Path(tempfile.mktemp(suffix=".pdf")) + out.save(tmp, deflate=True, garbage=4) + out.close() + return tmp + + +# ─── Medicus ověření ───────────────────────────────────────────────────────── + +def _medicus_connect(): + try: + import fdb + cfg = get_medicus_config() + return fdb.connect(dsn=cfg.dsn, user="SYSDBA", password="masterkey", charset="win1250") + except Exception as e: + print(f" [Medicus] Nepřipojeno: {e}") + return None + +def _lookup_by_rc(cur, rc_digits: str) -> dict | None: + cur.execute( + "SELECT IDPAC, PRIJMENI, JMENO, RODCIS FROM KAR " + "WHERE REPLACE(RODCIS, '/', '') = ?", (rc_digits,) + ) + row = cur.fetchone() + if row: + return {"idpac": row[0], "prijmeni": row[1].strip(), "jmeno": row[2].strip(), "rodcis": row[3].strip()} + return None + +def _rc_candidates(rc: str) -> list[str]: + similar = {"0": "8", "8": "0", "1": "7", "7": "1", "5": "6", "6": "5", "3": "8"} + candidates = set() + for i in range(len(rc)): + candidates.add(rc[:i] + rc[i+1:]) + for i in range(len(rc) + 1): + candidates.add(rc[:i] + "0" + rc[i:]) + for i, ch in enumerate(rc): + if ch in similar: + candidates.add(rc[:i] + similar[ch] + rc[i+1:]) + candidates.discard(rc) + return sorted(c for c in candidates if len(c) in (9, 10)) + +def _rc_checksum_ok(rc: str) -> bool: + digits = re.sub(r"\D", "", rc) + if len(digits) == 10: + return int(digits) % 11 == 0 + return True + +def verify_patient(rc_raw: str) -> dict: + rc = re.sub(r"\D", "", rc_raw or "") + if not rc: + return {"status": "not_found", "patient": None, "rc_corrected": None} + con = _medicus_connect() + if con is None: + return {"status": "offline", "patient": None, "rc_corrected": None} + try: + cur = con.cursor() + patient = _lookup_by_rc(cur, rc) + if patient: + return {"status": "ok", "patient": patient, "rc_corrected": None} + candidates = _rc_candidates(rc) + matches = [(c, _lookup_by_rc(cur, c)) for c in candidates] + matches = [(c, p) for c, p in matches if p] + if not matches: + return {"status": "not_found", "patient": None, "rc_corrected": None} + matches.sort(key=lambda x: (0 if _rc_checksum_ok(x[0]) else 1)) + best_rc, best_patient = matches[0] + return {"status": "fuzzy", "patient": best_patient, "rc_corrected": best_rc, "all_matches": matches} + finally: + con.close() + +def check_duplicates(rc: str, datum: str) -> list[str]: + if not rc or not datum: + return [] + # Počkej max 15s na dokončení indexu (typicky hotovo za dobu volání Claude) + _dokumentace_ready.wait(timeout=15) + prefix = f"{rc} {datum}" + return [name for name in _dokumentace_index if name.startswith(prefix)] + + +# ─── Korekce (few-shot příklady) ───────────────────────────────────────────── + +def load_corrections() -> list[dict]: + if CORRECTIONS_FILE.exists(): + return json.loads(CORRECTIONS_FILE.read_text(encoding="utf-8")) + return [] + +def save_correction(original: str, corrected: str): + corrections = load_corrections() + for c in corrections: + if c["original"] == original and c["corrected"] == corrected: + return + corrections.append({"original": original, "corrected": corrected}) + CORRECTIONS_FILE.write_text( + json.dumps(corrections, ensure_ascii=False, indent=2), encoding="utf-8" + ) + print(f" ✓ Korekce uložena ({len(corrections)} celkem)") + +def build_corrections_prompt() -> str: + corrections = load_corrections() + if not corrections: + return "" + lines = ["Příklady korekcí z minulých běhů (uč se z nich):"] + for c in corrections[-10:]: + lines.append(f' - špatně: "{c["original"]}"') + lines.append(f' správně: "{c["corrected"]}"') + return "\n".join(lines) + "\n\n" + + +# ─── Claude Vision API ──────────────────────────────────────────────────────── + +def extract_info(pdf_path: Path) -> dict: + print(" Převádím na obrázek...") + suffix = pdf_path.suffix.lower() + if suffix in (".jpg", ".jpeg", ".png"): + from PIL import Image + img = Image.open(pdf_path) + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=95) + img.close() + else: + images = convert_from_path(str(pdf_path), poppler_path=POPPLER_PATH, dpi=300) + buf = io.BytesIO() + images[0].save(buf, format="JPEG", quality=95) + del images + gc.collect() + image_b64 = base64.standard_b64encode(buf.getvalue()).decode("utf-8") + + prompt = ( + build_corrections_prompt() + + "Toto je naskenovaná lékařská zpráva v češtině. " + "Vrať JSON s těmito poli:\n" + "- \"jmeno\": celé jméno pacienta (příjmení + jméno + případný titul)\n" + "- \"rodne_cislo\": rodné číslo pacienta BEZ lomítka (pouze číslice)\n" + "- \"datum_zpravy\": datum zprávy ve formátu YYYY-MM-DD\n" + "- \"typ_dokumentu\": typ dokumentu — " + "\"LZ {oddělení}\" = ambulantní/lékařská zpráva (např. \"LZ chirurgie\", \"LZ kardiologie\", \"LZ plicní\", \"LZ ORL\"); " + "\"PZ {oddělení}\" = propouštěcí zpráva z hospitalizace (např. \"PZ interna\", \"PZ neurologie\"). " + "Jiné typy: \"Laboratoř\", \"CT břicha\", \"MRI páteře\", \"kolonoskopie\", " + "\"operační protokol oční\", \"poukaz FT\", \"diagnostická mamografie\" atd.\n" + "- \"poznamka\": krátká klinická poznámka česky, max 80 znaků. " + "DŮLEŽITÉ: pokud zpráva obsahuje sekci \"Závěr:\" nebo \"Závěr vyšetření:\", " + "použij VÝHRADNĚ obsah této sekce — je nejdůležitější. " + "Teprve pokud závěr chybí, shrň obsah z celé zprávy. " + "U laboratorních výsledků uváděj POUZE hodnoty mimo normu (patologické nálezy) — hodnoty v normě vynech. " + "Osmolalitu nikdy nezmiňuj ani jako patologický nález. " + "Pokud výsledky obsahují glomerulární filtraci (eGFR nebo C_CKD-EPI), přidej její klasifikaci velkými písmeny podle CKD-EPI: " + "eGFR ≥ 90 → CHRI G1, 60–89 → CHRI G2, 45–59 → CHRI G3a, 30–44 → CHRI G3b, 15–29 → CHRI G4, < 15 → CHRI G5.\n" + "- \"nazev_souboru\": název souboru ve formátu " + "\"{rodne_cislo} {datum_zpravy} {Příjmení}, {Jméno} [{typ_dokumentu}] [{poznamka}].pdf\" " + "(jméno bez titulu, RČ bez lomítka)\n" + "- \"rotace\": o kolik stupňů CCW je třeba otočit obrázek aby byl text čitelně na výšku nebo šířku " + "(hodnoty: 0, 90, 180, 270). Pokud je text již správně orientovaný, vrať 0.\n\n" + "Pokud pole nenajdeš, použij null. Nepiš nic jiného než JSON." + ) + + print(" Volám Claude Vision API...") + client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) + response = client.messages.create( + model="claude-sonnet-4-6", + max_tokens=400, + messages=[{"role": "user", "content": [ + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_b64}}, + {"type": "text", "text": prompt}, + ]}], + ) + usage = response.usage + print(f" Tokeny: {usage.input_tokens} in + {usage.output_tokens} out = ${usage.input_tokens*3/1e6 + usage.output_tokens*15/1e6:.4f}") + + raw = response.content[0].text.strip() + if raw.startswith("```"): + raw = raw.split("```")[1] + if raw.startswith("json"): + raw = raw[4:] + try: + return json.loads(raw.strip()) + except json.JSONDecodeError: + print(f" VAROVÁNÍ: nelze parsovat JSON: {raw!r}") + return {"nazev_souboru": None, "raw": raw} + + +# ─── Subprocess helpers ─────────────────────────────────────────────────────── + +def open_preview(pdf_path: Path) -> tuple[subprocess.Popen, Path]: + geom_file = Path(tempfile.mktemp(suffix=".json")) + proc = subprocess.Popen([sys.executable, str(VIEWER), str(pdf_path), f"--write-geometry={geom_file}"]) + return proc, geom_file + + +def read_preview_bottom(geom_file: Path, timeout: float = 5.0) -> int: + import time + deadline = time.time() + timeout + while time.time() < deadline: + if geom_file.exists(): + geom = json.loads(geom_file.read_text(encoding="utf-8")) + geom_file.unlink(missing_ok=True) + return geom["y"] + geom["h"] + 30 # +30 pro title bar + time.sleep(0.1) + geom_file.unlink(missing_ok=True) + return None + + +def run_rename_dialog(nazev: str, info_lines: list, below_y: int = None) -> str | None: + tmp = Path(tempfile.mktemp(suffix=".json")) + tmp.write_text(json.dumps({"nazev": nazev, "info_lines": info_lines}, ensure_ascii=False), encoding="utf-8") + args = [sys.executable, str(RENAME_DIALOG), str(tmp)] + if below_y is not None: + args.append(f"--below-y={below_y}") + proc = subprocess.run(args, capture_output=True, text=True, encoding="utf-8") + tmp.unlink(missing_ok=True) + out = proc.stdout.strip() + return json.loads(out).get("value") if out else None + + +def run_variant_picker(variants_data: list) -> str | None: + tmp = Path(tempfile.mktemp(suffix=".json")) + tmp.write_text(json.dumps(variants_data, ensure_ascii=False), encoding="utf-8") + proc = subprocess.run( + [sys.executable, str(VARIANT_PICKER), str(tmp)], + capture_output=True, text=True, encoding="utf-8", + ) + tmp.unlink(missing_ok=True) + out = proc.stdout.strip() + return json.loads(out).get("chosen") if out else None + + +# ─── Hlavní flow ────────────────────────────────────────────────────────────── + +def process_file(pdf_path: Path): + print(f"\nSoubor: {pdf_path.name}") + + # Spusť načítání indexu dokumentace na pozadí — hotovo za dobu volání Claude + start_dokumentace_index() + + # 1. Otevři preview originálu + preview, geom_file = open_preview(pdf_path) + below_y = read_preview_bottom(geom_file) + + # 2. Claude Vision API + info = extract_info(pdf_path) + nazev = info.get("nazev_souboru") or pdf_path.name + + # 3. Medicus ověření + fuzzy matching RČ + rc_from_scan = re.sub(r"\D", "", info.get("rodne_cislo") or "") + print(f" Ověřuji v Medicus (RČ: {rc_from_scan})...") + verif = verify_patient(rc_from_scan) + + # Oprava RČ při fuzzy matchi + if verif["status"] == "fuzzy" and verif.get("rc_corrected") and nazev: + nazev = nazev.replace(rc_from_scan, verif["rc_corrected"], 1) + print(f" → RČ opraveno: {rc_from_scan} → {verif['rc_corrected']}") + + # Info řádky pro dialog + status = verif["status"] + patient = verif.get("patient") + info_lines = [] + if status == "ok": + info_lines.append(f"✓ Medicus: {patient['prijmeni']} {patient['jmeno']} | RČ {patient['rodcis']}") + elif status == "fuzzy": + info_lines.append(f"⚠ RČ ze skenu '{rc_from_scan}' → opraveno na {verif['rc_corrected']}") + info_lines.append(f" Pacient: {patient['prijmeni']} {patient['jmeno']} | RČ {patient['rodcis']}") + elif status == "not_found": + info_lines.append(f"✗ RČ '{rc_from_scan}' nenalezeno v Medicus") + else: + info_lines.append("— Medicus nedostupný (offline)") + + # Duplicity + rc_final = re.sub(r"\D", "", verif["patient"]["rodcis"] if patient else rc_from_scan) + duplicity = check_duplicates(rc_final, info.get("datum_zpravy") or "") + if duplicity: + info_lines.append(f"⚠ DUPLICITA: {', '.join(duplicity)}") + + if not info_lines: + info_lines = ["[Claude nevrátil název — uprav ručně]"] + print(" Otevírám dialog pro schválení názvu...") + final_name = run_rename_dialog(nazev, info_lines, below_y=below_y) + + preview.terminate() + + if not final_name: + print(" Přeskočeno.") + return + + if not final_name.endswith(".pdf"): + final_name += ".pdf" + final_name = re.sub(r'[<>:"/\\|?*]', '', final_name) + + if nazev and final_name != nazev: + save_correction(nazev, final_name) + + print(f" Schválený název: {final_name}") + + # 4. Generuj kompresní varianty (originál + 5 variant) + print(" Generuji kompresní varianty...") + temp_files = [] + orig_kb = round(pdf_path.stat().st_size / 1024) + variants_data = [{"path": str(pdf_path), "label": "Originál", "size_kb": orig_kb}] + for label, dpi, quality in COMPRESS_VARIANTS: + tmp = compress_to_temp(pdf_path, dpi, quality) + size_kb = round(tmp.stat().st_size / 1024) + temp_files.append(tmp) + variants_data.append({"path": str(tmp), "label": label, "size_kb": size_kb}) + print(f" {label}: {size_kb} kB") + + # 5. Vyber variantu + print(" Vyber variantu v okně...") + chosen = run_variant_picker(variants_data) + + if not chosen: + print(" Žádná varianta nevybrána, přeskakuji.") + for t in temp_files: + t.unlink(missing_ok=True) + return + + # 6. Ulož do Processed + PROCESSED.mkdir(exist_ok=True) + dest = PROCESSED / final_name + if dest.exists(): + print(f" VAROVÁNÍ: '{final_name}' již existuje, přeskakuji.") + else: + shutil.copy2(chosen, dest) + pdf_path.unlink() + print(f" ✓ Uloženo: {dest.name}") + + for t in temp_files: + t.unlink(missing_ok=True) # originál mezi temp_files není, je bezpečné + + +def process_folder(folder: Path): + files = sorted(f for f in folder.iterdir() if f.suffix.lower() in (".pdf", ".jpg", ".jpeg", ".png")) + if not files: + print(f"Žádné soubory v: {folder}") + return + print(f"Nalezeno {len(files)} soubor(ů).") + for f in files: + try: + process_file(f) + except Exception as e: + print(f" CHYBA: {e}") + print("\nHotovo.") + + +if __name__ == "__main__": + PROCESSED.mkdir(exist_ok=True) + TO_PROCESS.mkdir(exist_ok=True) + + target = Path(sys.argv[1]) if len(sys.argv) > 1 else TO_PROCESS + + if target.is_file(): + process_file(target) + elif target.is_dir(): + process_folder(target) + else: + print("Použití: python extract_patient_info_novy.py [soubor.pdf nebo složka]") + sys.exit(1) diff --git a/60 ScansProcessing/preview_viewer.py b/60 ScansProcessing/preview_viewer.py index f4b9cc2..cbd9b78 100644 --- a/60 ScansProcessing/preview_viewer.py +++ b/60 ScansProcessing/preview_viewer.py @@ -90,7 +90,20 @@ def main(): show(0) root.update_idletasks() - root.geometry("+0+0") + sw = root.winfo_screenwidth() + w = root.winfo_width() + h = root.winfo_height() + x = (sw - w) // 2 + root.geometry(f"+{x}+0") + + # Zapiš geometrii do souboru pokud byl předán argument --write-geometry= + import json as _json + for arg in sys.argv: + if arg.startswith("--write-geometry="): + geom_path = Path(arg.split("=", 1)[1]) + geom_path.write_text(_json.dumps({"x": x, "y": 0, "w": w, "h": h}), encoding="utf-8") + break + root.mainloop() diff --git a/60 ScansProcessing/rename_dialog.py b/60 ScansProcessing/rename_dialog.py new file mode 100644 index 0000000..134c2ae --- /dev/null +++ b/60 ScansProcessing/rename_dialog.py @@ -0,0 +1,93 @@ +""" +Standalone dialog pro schválení / opravu názvu souboru. +Spouští se jako subprocess z extract_patient_info.py. +Argumenty: rename_dialog.py +JSON vstup: { "nazev": "...", "info_lines": [...] } +JSON výstup: { "value": "..." } nebo { "value": null } +""" +import json +import sys +from pathlib import Path +import tkinter as tk + + +def main(): + if len(sys.argv) < 2: + print(json.dumps({"value": None})) + sys.exit(0) + + data = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8")) + nazev = data.get("nazev") or "" + info_lines = data.get("info_lines") or [] + + result = {"value": None} + + root = tk.Tk() + root.title("Schválení názvu souboru") + root.resizable(True, False) + root.attributes("-topmost", True) + root.tk.call("encoding", "system", "utf-8") + + pad = {"padx": 12, "pady": 6} + + frame_info = tk.Frame(root, bg="#f0f0f0", bd=1, relief="sunken") + frame_info.pack(fill="x", **pad) + for line in info_lines: + color = "#b00000" if line.startswith("⚠") else "#004080" if line.startswith("✓") else "#333" + tk.Label(frame_info, text=line, anchor="w", bg="#f0f0f0", + fg=color, font=("Segoe UI", 10)).pack(fill="x", padx=8, pady=1) + + tk.Label(root, text="Název souboru (bez .pdf):", anchor="w", + font=("Segoe UI", 9, "bold")).pack(fill="x", padx=12, pady=(10, 2)) + + nazev_bez = nazev[:-4] if nazev.endswith(".pdf") else nazev + var = tk.StringVar(value=nazev_bez) + entry = tk.Entry(root, textvariable=var, font=("Segoe UI", 10), width=90) + entry.pack(fill="x", padx=12, pady=(0, 10)) + entry.icursor(tk.END) + entry.focus_set() + + frame_btn = tk.Frame(root) + frame_btn.pack(pady=(0, 12)) + + def schvalit(event=None): + result["value"] = var.get().strip() + root.destroy() + + def preskocit(event=None): + result["value"] = None + root.destroy() + + tk.Button(frame_btn, text="✓ Schválit (Enter)", command=schvalit, + bg="#2a7a2a", fg="white", font=("Segoe UI", 10, "bold"), + padx=16, pady=6).pack(side="left", padx=8) + tk.Button(frame_btn, text="✗ Přeskočit (Esc)", command=preskocit, + bg="#7a2a2a", fg="white", font=("Segoe UI", 10), + padx=16, pady=6).pack(side="left", padx=8) + + root.bind("", schvalit) + root.bind("", preskocit) + + root.update_idletasks() + sw = root.winfo_screenwidth() + w = root.winfo_width() + x = (sw - w) // 2 + + # Pozice pod preview oknem pokud byl předán argument --below-y=N + below_y = None + for arg in sys.argv: + if arg.startswith("--below-y="): + below_y = int(arg.split("=", 1)[1]) + break + y = below_y if below_y is not None else (root.winfo_screenheight() - root.winfo_height() - 60) + root.geometry(f"+{x}+{y}") + + root.lift() + root.focus_force() + root.mainloop() + + print(json.dumps({"value": result["value"]}, ensure_ascii=False)) + + +if __name__ == "__main__": + main() diff --git a/60 ScansProcessing/variant_picker.py b/60 ScansProcessing/variant_picker.py new file mode 100644 index 0000000..b4cb5e5 --- /dev/null +++ b/60 ScansProcessing/variant_picker.py @@ -0,0 +1,148 @@ +""" +Jedno okno pro výběr kompresní varianty PDF. +Nahoře tlačítka 1–N pro přepínání, tlačítko "Tohle beru" pro potvrzení. +Argumenty: variant_picker.py +JSON vstup: [{"path": "...", "label": "150 DPI / q80", "size_kb": 139}, ...] +JSON výstup (stdout): {"chosen": "cesta/k/souboru"} +""" +import json +import sys +from pathlib import Path +import tkinter as tk +from PIL import Image, ImageTk +import fitz + + +def main(): + if len(sys.argv) < 2: + sys.exit(1) + + variants = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8")) + chosen = {"path": None} + docs = [fitz.open(v["path"]) for v in variants] + current = [0] + photo_ref = [None] + + root = tk.Tk() + root.tk.call("encoding", "system", "utf-8") + root.attributes("-topmost", True) + + sh = root.winfo_screenheight() + sw = root.winfo_screenwidth() + win_h = sh - 80 # odečteme taskbar + title bar + img_h = win_h - 160 + img_w = sw // 2 # šířka okna = polovina monitoru + + x = (sw - img_w) // 2 + root.geometry(f"{img_w}x{win_h}+{x}+0") + root.resizable(False, False) + + # ── Horní panel s tlačítky variant ── + frame_top = tk.Frame(root, bg="#222") + frame_top.pack(fill="x") + + btn_variants = [] + current_page = [0] + + def show(n, page_n=0): + current[0] = n + current_page[0] = page_n + doc = docs[n] + page = doc[page_n] + zoom = min(img_w / page.rect.width, img_h / page.rect.height) + pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom)) + img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples) + photo_ref[0] = ImageTk.PhotoImage(img) + lbl_img.config(image=photo_ref[0]) + page_count = len(doc) + root.title(f"Varianta {n+1}: {variants[n]['label']} ({variants[n]['size_kb']} kB) — strana {page_n+1}/{page_count}") + for i, b in enumerate(btn_variants): + b.config(bg="#2a5a9a" if i == n else "#444") + btn_prev_page.config(state="normal" if page_n > 0 else "disabled") + btn_next_page.config(state="normal" if page_n < page_count - 1 else "disabled") + + for i, v in enumerate(variants): + b = tk.Button( + frame_top, + text=f"{i+1}. {v['label']}\n{v['size_kb']} kB", + font=("Segoe UI", 9, "bold"), + bg="#444", fg="white", + relief="flat", padx=8, pady=6, + command=lambda n=i: show(n), + ) + b.pack(side="left", padx=2, pady=4) + btn_variants.append(b) + + # ── Tlačítka Beru / Přeskočit — stejný styl jako varianty ── + def beru(): + chosen["path"] = variants[current[0]]["path"] + root.destroy() + + def preskocit(): + root.destroy() + + tk.Button( + frame_top, + text="✓ Tohle beru\n", + command=beru, + bg="#2a7a2a", fg="white", + font=("Segoe UI", 9, "bold"), + relief="flat", padx=8, pady=6, + ).pack(side="left", padx=2, pady=4) + + tk.Button( + frame_top, + text="✗ Přeskočit\n", + command=preskocit, + bg="#7a2a2a", fg="white", + font=("Segoe UI", 9, "bold"), + relief="flat", padx=8, pady=6, + ).pack(side="left", padx=2, pady=4) + + # ── Navigace stran — úplně vpravo ── + btn_next_page = tk.Button( + frame_top, + text="Další ►\n", + command=lambda: show(current[0], current_page[0] + 1), + bg="#555", fg="white", + font=("Segoe UI", 9, "bold"), + relief="flat", padx=8, pady=6, + ) + btn_next_page.pack(side="right", padx=2, pady=4) + + btn_prev_page = tk.Button( + frame_top, + text="◄ Před.\n", + command=lambda: show(current[0], current_page[0] - 1), + bg="#555", fg="white", + font=("Segoe UI", 9, "bold"), + relief="flat", padx=8, pady=6, + ) + btn_prev_page.pack(side="right", padx=2, pady=4) + + # ── Obrázek ── + lbl_img = tk.Label(root, bg="black") + lbl_img.pack(fill="both", expand=True) + + root.bind("", lambda e: show(0)) + root.bind("", lambda e: show(1)) + root.bind("", lambda e: show(2)) + root.bind("", lambda e: show(3)) + root.bind("", lambda e: show(4)) + root.bind("", lambda e: beru()) + root.bind("", lambda e: preskocit()) + + show(0) + root.mainloop() + + for d in docs: + try: + d.close() + except Exception: + pass + + print(json.dumps({"chosen": chosen["path"]}, ensure_ascii=False)) + + +if __name__ == "__main__": + main()