Files
ordinaceprojekt/RozkladScanu/navrh_pojmenovani.py
T
2026-05-18 11:21:13 +02:00

246 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
navrh_pojmenovani.py — Claude navrhne jmeno souboru pro naskenovanou brozuru.
Vezme prvnich N stranek PDF, posle je Claudovi jako obrazky a navrhne nazev.
Dialog umozni opravit navrh. Historie dvojic (navrh Claudea, volba uzivatele)
se ulozi a priste prikladem uci Claudea pojmenovavat podle tvych preferencí.
Pouziti:
python navrh_pojmenovani.py vystup/brozura.pdf
python navrh_pojmenovani.py vystup/brozura.pdf vystup/muj_nazev.pdf
"""
import sys
import os
import json
import base64
import io
import copy
import tkinter as tk
from tkinter import filedialog
from pathlib import Path
from datetime import date
import fitz # pymupdf
import anthropic
def _load_env():
env_path = Path(__file__).resolve().parent.parent / "Medevio" / ".env"
if env_path.exists():
for line in env_path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if "=" in line and not line.startswith("#"):
k, v = line.split("=", 1)
os.environ[k.strip()] = v.strip()
_load_env()
HISTORY_FILE = Path(__file__).parent / "pojmenovani_historie.json"
N_PAGES = 5
DPI = 120 # dostatecne pro cteni obsahu, nizke naklady na tokeny
# ---------- pomocne funkce ----------
def load_history() -> list:
if HISTORY_FILE.exists():
return json.loads(HISTORY_FILE.read_text(encoding="utf-8"))
return []
def save_history(history: list):
HISTORY_FILE.write_text(
json.dumps(history, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def pdf_pages_to_b64(pdf_path: str, n: int = N_PAGES) -> list[str]:
"""Vrati seznam base64 JPEG retezcu pro prvnich n stranek PDF."""
doc = fitz.open(pdf_path)
pages = min(n, len(doc))
result = []
mat = fitz.Matrix(DPI / 72, DPI / 72)
for i in range(pages):
pix = doc[i].get_pixmap(matrix=mat, colorspace=fitz.csRGB)
jpeg = pix.tobytes("jpeg", jpg_quality=75)
result.append(base64.standard_b64encode(jpeg).decode())
doc.close()
return result
def ask_claude(images_b64: list[str], history: list) -> str:
"""Posle stranky Claudovi a vrati navrzeny nazev souboru."""
client = anthropic.Anthropic()
history_text = ""
if history:
history_text = "\n\nPriklady z minulosti (muj navrh → uzivatel zvolil):\n"
for item in history[-15:]:
history_text += f" {item['claude']} -> {item['user']}\n"
content: list = []
for i, b64 in enumerate(images_b64):
content.append({
"type": "image",
"source": {"type": "base64", "media_type": "image/jpeg", "data": b64},
})
today = date.today().strftime("%Y-%m-%d")
content.append({
"type": "text",
"text": (
f"Toto je prvnich {len(images_b64)} stranek naskenované brozury. "
"Navrhni vhodny kratky nazev souboru (bez pripony). "
f"Zacni vzdy datem dnesniho dne ve formatu YYYY-MM-DD ({today}), "
"pak podtrzitko a popis obsahu. "
"Pouzij pouze ASCII znaky, cisla, podtrzitka nebo pomlcky — zadna diakritika ani mezery. "
"Popis ma vystihovat obsah dokumentu, byt strucny (24 slova)."
f"{history_text}\n\n"
"Odpovez pouze samotnym nazvem souboru, nic jineho. "
f"Priklad spravneho formatu: {today}_nazev_dokumentu"
),
})
response = client.messages.create(
model="claude-opus-4-7",
max_tokens=64,
thinking={"type": "adaptive"},
messages=[{"role": "user", "content": content}],
)
for block in response.content:
if block.type == "text":
return block.text.strip().strip('"').strip("'")
return "brozura"
def dialog(claude_suggestion: str) -> dict:
"""Zobrazi dialog s navrzenim nazvem, uzivatel muze opravit."""
root = tk.Tk()
root.title("Navrh pojmenovani")
root.resizable(False, False)
root.attributes("-topmost", True)
result = {"name": None, "cancelled": False}
tk.Label(
root, text="Claude navrhuje:", font=("Segoe UI", 9, "italic"), fg="#555"
).pack(padx=24, pady=(16, 2), anchor="w")
tk.Label(
root, text=claude_suggestion, font=("Segoe UI", 12, "bold"), fg="#1a6aaa"
).pack(padx=24, anchor="w")
tk.Label(
root, text="Nazev souboru (uprav pokud chces):", font=("Segoe UI", 9)
).pack(padx=24, pady=(12, 2), anchor="w")
entry = tk.Entry(root, width=52, font=("Segoe UI", 10))
entry.insert(0, claude_suggestion)
entry.pack(padx=24)
entry.select_range(0, tk.END)
entry.focus_set()
def ok(event=None):
val = entry.get().strip()
if val:
result["name"] = val
root.destroy()
def cancel(event=None):
result["cancelled"] = True
root.destroy()
btn = tk.Frame(root)
btn.pack(pady=16)
tk.Button(btn, text="OK", width=12, command=ok).pack(side=tk.LEFT, padx=6)
tk.Button(btn, text="Zrusit", width=12, command=cancel).pack(side=tk.LEFT, padx=6)
root.bind("<Return>", ok)
root.bind("<Escape>", cancel)
root.mainloop()
return result
# ---------- hlavni logika ----------
def pick_file() -> Path | None:
"""Otevre dialog pro vyber PDF souboru."""
root = tk.Tk()
root.withdraw()
root.attributes("-topmost", True)
chosen = filedialog.askopenfilename(
title="Vyber PDF k pojmenovani",
filetypes=[("PDF soubory", "*.pdf"), ("Vsechny soubory", "*.*")],
initialdir=Path(__file__).parent / "vystup",
)
root.destroy()
return Path(chosen) if chosen else None
def main():
if len(sys.argv) < 2:
pdf_path = pick_file()
if pdf_path is None:
print("Zadny soubor nebyl vybran.")
sys.exit(0)
else:
pdf_path = Path(sys.argv[1])
if not pdf_path.exists():
print(f"Soubor nenalezen: {pdf_path}")
sys.exit(1)
# vystupni cesta — volitelny druhy argument
out_path: Path | None = None
if len(sys.argv) > 2:
out_path = Path(sys.argv[2])
print(f"Nacitam stranky z: {pdf_path}")
images = pdf_pages_to_b64(str(pdf_path), N_PAGES)
print(f" {len(images)} stranek prevedeno na obrazky")
history = load_history()
print(f" {len(history)} polozek v historii pojmenovani")
print("Ptam se Claudea...")
claude_name = ask_claude(images, history)
print(f" Claude navrhuje: {claude_name}")
# Pokud uz jsme tento soubor pojmenovavali (Claude navrhl stejne jmeno),
# predvyplnime dialog predchozi volbou uzivatele.
prefill = claude_name
for item in reversed(history):
if item["claude"] == claude_name:
prefill = item["user"]
print(f" Znamy dokument — predvyplnuji predchozi nazev: {prefill}")
break
res = dialog(prefill)
if res["cancelled"] or not res["name"]:
print("Zruseno — soubor nebyl prejmenovan.")
return
user_name = res["name"].replace("/", "-").replace("\\", "-").strip()
# uloz do historie
history.append({"claude": claude_name, "user": user_name})
save_history(history)
print(f" Historie ulozena ({len(history)} polozek)")
# urceni vystupniho souboru
if out_path is None:
out_path = pdf_path.parent / (user_name + pdf_path.suffix)
if out_path != pdf_path:
pdf_path.rename(out_path)
print(f"Prejmenovan: {out_path}")
else:
print(f"Nazev beze zmeny: {out_path}")
if __name__ == "__main__":
main()