diff --git a/SběrDatRůzné/SudokuKiller/30_BatchCrop.py b/SběrDatRůzné/SudokuKiller/30_BatchCrop.py new file mode 100644 index 0000000..9c9a622 --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/30_BatchCrop.py @@ -0,0 +1,199 @@ +""" +Batch ořez puzzle z MySQL. + +Pro každý řádek v sudoku_killer kde file_puzzle_cropped IS NULL: + - načte file_puzzle + crop_method + - ořízne podle metody + - uloží zpět do file_puzzle_cropped +""" + +# --------------------------------------------------------------------------- +# Nastavení — upravuj zde před spuštěním v PyCharm +# --------------------------------------------------------------------------- +WORKERS = 4 # počet paralelních procesů +LIMIT = None # None = vše; číslo (např. 20) = jen prvních N puzzle (pro testování) +BATCH = 200 # kolik oříznutých PDF uložit najednou do DB +DRY_RUN = False # True = jen ořez, nic se neuloží do DB +LOG_EVERY = 500 # vypiš stav do konzole každých N zpracovaných puzzle +# --------------------------------------------------------------------------- + +import sys +import json +import csv +from pathlib import Path +from concurrent.futures import ProcessPoolExecutor, as_completed + +import fitz +from tqdm import tqdm + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny")) +from mysql_db import connect_mysql + +sys.stdout.reconfigure(encoding="utf-8") +sys.stderr.reconfigure(encoding="utf-8") + +ERRORS_CSV = Path(__file__).parent / "crop_errors.csv" + + +# --------------------------------------------------------------------------- +# Crop metody — přidat sem nové funkce pro nové metody +# --------------------------------------------------------------------------- + +def crop_raycast_auto(pdf_bytes: bytes, params: dict) -> bytes: + crop_margin = params.get("crop_margin_pt", 2) + + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + page = doc[0] + paths = page.get_drawings() + y_mid = page.mediabox.height / 2 + + hit_h = [(p["rect"], p.get("width") or 0) for p in paths + if p["rect"].y0 <= y_mid <= p["rect"].y1] + if not hit_h: + raise ValueError("ray-cast: zadne kresby na y_mid") + + rects = [r for r, _ in hit_h] + x_left = min(r.x0 for r in rects) + x_right = max(r.x1 for r in rects) + top_cut = min(r.y0 for r in rects) + bot_cut = max(r.y1 for r in rects) + lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0) + lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0) + + clip = fitz.Rect( + x_left - lw_l / 2 - crop_margin, + top_cut - crop_margin, + x_right + lw_r / 2 + crop_margin, + bot_cut + crop_margin, + ) + + doc_new = fitz.open() + p = doc_new.new_page(width=clip.width, height=clip.height) + p.show_pdf_page(fitz.Rect(0, 0, clip.width, clip.height), doc, 0, clip=clip) + out = doc_new.tobytes() + doc.close() + doc_new.close() + return out + + +CROP_METHODS = { + "raycast_auto": crop_raycast_auto, +} + + +# --------------------------------------------------------------------------- +# Worker — spouští se v samostatném procesu +# --------------------------------------------------------------------------- + +def process_one(args): + puzzle_id, puzzle_number, pdf_bytes, method_name, params_json = args + try: + params = json.loads(params_json) if isinstance(params_json, str) else params_json + fn = CROP_METHODS.get(method_name) + if fn is None: + return puzzle_id, puzzle_number, None, f"neznama metoda: {method_name}" + cropped = fn(bytes(pdf_bytes), params) + return puzzle_id, puzzle_number, cropped, None + except Exception as e: + return puzzle_id, puzzle_number, None, str(e) + + +# --------------------------------------------------------------------------- +# Hlavní logika +# --------------------------------------------------------------------------- + +def fetch_todo(limit): + import pymysql.cursors + conn = connect_mysql(database="puzzle", cursorclass=pymysql.cursors.DictCursor) + cur = conn.cursor() + sql = """ + SELECT sk.id, sk.puzzle_number, sk.file_puzzle, + cm.name AS method_name, cm.params_json + FROM sudoku_killer sk + JOIN puzzle_crop_method cm ON sk.crop_method_id = cm.id + WHERE sk.file_puzzle_cropped IS NULL + ORDER BY sk.puzzle_number + """ + if limit: + sql += f" LIMIT {int(limit)}" + cur.execute(sql) + rows = cur.fetchall() + cur.close() + conn.close() + return rows + + +def save_cropped(updates: list[tuple]): + """updates = [(cropped_bytes, puzzle_id), ...]""" + import pymysql.cursors + conn = connect_mysql(database="puzzle", cursorclass=pymysql.cursors.DictCursor) + cur = conn.cursor() + cur.executemany( + "UPDATE sudoku_killer SET file_puzzle_cropped = %s WHERE id = %s", + updates, + ) + cur.close() + conn.close() + + +def main(): + print("Nacitam seznam puzzle k orizeni...") + rows = fetch_todo(LIMIT) + total = len(rows) + if total == 0: + print("Vsechny puzzle jsou jiz orizeny.") + return + print(f"Ke zpracovani: {total} puzzle | workers: {WORKERS} | batch: {BATCH} | dry-run: {DRY_RUN}") + + errors = [] + pending_saves = [] # [(cropped_bytes, puzzle_id)] + done = 0 + + tasks = [ + (r["id"], r["puzzle_number"], r["file_puzzle"], r["method_name"], r["params_json"]) + for r in rows + ] + + with ProcessPoolExecutor(max_workers=WORKERS) as executor: + futures = {executor.submit(process_one, t): t for t in tasks} + with tqdm(total=total, unit="puzzle") as bar: + for future in as_completed(futures): + puzzle_id, puzzle_number, cropped, err = future.result() + + if err: + errors.append({"puzzle_id": puzzle_id, "puzzle_number": puzzle_number, "chyba": err}) + tqdm.write(f" [CHYBA] puzzle #{puzzle_number}: {err}") + elif not DRY_RUN: + pending_saves.append((cropped, puzzle_id)) + if len(pending_saves) >= BATCH: + save_cropped(pending_saves) + pending_saves.clear() + + done += 1 + bar.update(1) + bar.set_postfix(chyby=len(errors), ulozeno=done - len(errors) - len(pending_saves)) + + if done % LOG_EVERY == 0: + zbyvá = total - done + pct = done / total * 100 + tqdm.write(f" >> {done}/{total} ({pct:.1f}%) | puzzle #{puzzle_number} | zbyvá: {zbyvá} | chyby: {len(errors)}") + + # Uložit zbývající + if pending_saves and not DRY_RUN: + save_cropped(pending_saves) + + if errors: + with open(ERRORS_CSV, "w", newline="", encoding="utf-8") as f: + w = csv.DictWriter(f, fieldnames=["puzzle_id", "puzzle_number", "chyba"]) + w.writeheader() + w.writerows(errors) + print(f"\nChyby: {len(errors)} — viz {ERRORS_CSV}") + else: + print("\nVse bez chyb.") + + ok = done - len(errors) + print(f"Hotovo: {ok} orizeno, {len(errors)} chyb, {total - done} preskoceno.") + + +if __name__ == "__main__": + main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/10 MakePuzzleSmaller.py b/SběrDatRůzné/SudokuKiller/Testy/10 MakePuzzleSmaller.py new file mode 100644 index 0000000..57b80c0 --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/Testy/10 MakePuzzleSmaller.py @@ -0,0 +1,42 @@ +from pathlib import Path +from pypdf import PdfReader, PdfWriter, Transformation, PageObject + +INPUT_PDF = Path(r"2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") +OUTPUT_PDF = Path(r"sudoku_50pct_A4.pdf") + +# A4 v bodech, 72 dpi +A4_WIDTH = 595.2756 +A4_HEIGHT = 841.8898 + +SCALE = 0.5 + +reader = PdfReader(str(INPUT_PDF)) +source_page = reader.pages[0] + +source_width = float(source_page.mediabox.width) +source_height = float(source_page.mediabox.height) + +# Nová prázdná A4 stránka +new_page = PageObject.create_blank_page( + width=A4_WIDTH, + height=A4_HEIGHT +) + +# Výpočet pozice pro vycentrování +target_width = source_width * SCALE +target_height = source_height * SCALE + +x = (A4_WIDTH - target_width) / 2 +y = (A4_HEIGHT - target_height) / 2 + +# Vložit původní PDF stránku jako vektorový objekt, zmenšený na 50 % +transform = Transformation().scale(SCALE).translate(x, y) +new_page.merge_transformed_page(source_page, transform, expand=False) + +writer = PdfWriter() +writer.add_page(new_page) + +with OUTPUT_PDF.open("wb") as f: + writer.write(f) + +print(f"Hotovo: {OUTPUT_PDF}") \ No newline at end of file diff --git a/SběrDatRůzné/SudokuKiller/Testy/20_CropPuzzles.py b/SběrDatRůzné/SudokuKiller/Testy/20_CropPuzzles.py new file mode 100644 index 0000000..2894980 --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/Testy/20_CropPuzzles.py @@ -0,0 +1,137 @@ +""" +Batch crop Killer Sudoku PDF souborů — odstraní nadpis nahoře a copyright dole. +Zachovává vektorový obsah (cairo-generované PDF). + +Použití: + python 20_CropPuzzles.py [--workers N] +""" + +import argparse +import csv +import sys +from concurrent.futures import ProcessPoolExecutor, as_completed +from pathlib import Path + +import fitz # PyMuPDF +from tqdm import tqdm + + +def detect_cuts(paths): + """Vrátí (top_cut, bot_cut) nebo (None, None) pokud detekce selže.""" + ys0 = sorted(set(round(p["rect"].y0) for p in paths)) + ys1 = sorted(set(round(p["rect"].y1) for p in paths)) + + top_cut = None + for i in range(1, len(ys0)): + if ys0[i] - ys0[i - 1] > 10: + top_cut = (ys0[i - 1] + ys0[i]) / 2 + break + + bot_cut = None + for i in range(len(ys1) - 1, 0, -1): + if ys1[i] - ys1[i - 1] > 5: + bot_cut = (ys1[i - 1] + ys1[i]) / 2 + break + + return top_cut, bot_cut + + +def crop_one(args): + """Zpracuje jeden soubor. Vrátí (src_path, status, detail).""" + src_path, dst_path = args + try: + doc_src = fitz.open(str(src_path)) + page = doc_src[0] + paths = page.get_drawings() + + if not paths: + doc_src.close() + return str(src_path), "anomalie", "žádné kresby (get_drawings prázdný)" + + top_cut, bot_cut = detect_cuts(paths) + + if top_cut is None or bot_cut is None: + doc_src.close() + return str(src_path), "anomalie", f"gap detekce selhala (top={top_cut}, bot={bot_cut})" + + page_w = page.mediabox.width + clip = fitz.Rect(0, top_cut, page_w, bot_cut) + + doc_new = fitz.open() + p = doc_new.new_page(width=clip.width, height=clip.height) + p.show_pdf_page(fitz.Rect(0, 0, clip.width, clip.height), doc_src, 0, clip=clip) + + dst_path.parent.mkdir(parents=True, exist_ok=True) + doc_new.save(str(dst_path)) + + doc_src.close() + doc_new.close() + return str(src_path), "ok", "" + + except Exception as e: + return str(src_path), "chyba", str(e) + + +def main(): + parser = argparse.ArgumentParser(description="Batch crop Killer Sudoku PDF") + parser.add_argument("vstup", help="Vstupní adresář s PDF soubory") + parser.add_argument("vystup", help="Výstupní adresář pro oříznuté PDF") + parser.add_argument("--workers", type=int, default=4, help="Počet procesů (default: 4)") + args = parser.parse_args() + + src_dir = Path(args.vstup) + dst_dir = Path(args.vystup) + + if not src_dir.is_dir(): + print(f"Chyba: vstupní adresář neexistuje: {src_dir}", file=sys.stderr) + sys.exit(1) + + dst_dir.mkdir(parents=True, exist_ok=True) + + all_pdfs = sorted(src_dir.rglob("*.pdf")) + if not all_pdfs: + print("Žádné PDF soubory nenalezeny.") + sys.exit(0) + + # Přeskočit již zpracované + tasks = [] + skipped = 0 + for src in all_pdfs: + rel = src.relative_to(src_dir) + dst = dst_dir / rel + if dst.exists(): + skipped += 1 + else: + tasks.append((src, dst)) + + print(f"Celkem PDF: {len(all_pdfs)}, přeskočeno (existují): {skipped}, ke zpracování: {len(tasks)}") + + if not tasks: + print("Vše již zpracováno.") + return + + errors_csv = dst_dir / "errors.csv" + errors = [] + + with ProcessPoolExecutor(max_workers=args.workers) as executor: + futures = {executor.submit(crop_one, t): t for t in tasks} + with tqdm(total=len(tasks), unit="soubor") as bar: + for future in as_completed(futures): + src_path, status, detail = future.result() + if status != "ok": + errors.append({"soubor": src_path, "typ": status, "detail": detail}) + bar.update(1) + bar.set_postfix(chyby=len(errors)) + + if errors: + with open(errors_csv, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=["soubor", "typ", "detail"]) + writer.writeheader() + writer.writerows(errors) + print(f"\nChyby/anomálie: {len(errors)} — viz {errors_csv}") + else: + print("\nVšechny soubory zpracovány bez chyb.") + + +if __name__ == "__main__": + main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/21_CropRayCast.py b/SběrDatRůzné/SudokuKiller/Testy/21_CropRayCast.py new file mode 100644 index 0000000..116634c --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/Testy/21_CropRayCast.py @@ -0,0 +1,61 @@ +""" +Crop Killer Sudoku PDF ray-casting metodou: +1. Horizontální paprsek na y_mid → najde x_left, x_right mřížky +2. Vertikální paprsek podél x_left → najde top_cut, bot_cut mřížky +Výsledek: oříznuté PDF jen s mřížkou + malý bílý rámeček (MARGIN). +""" + +import fitz +from pathlib import Path + +MARGIN = 4 # pt bílého rámečku kolem mřížky + +SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") +DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/cropped_raycast.pdf") + + +def crop_raycast(src_path: Path, dst_path: Path, margin: float = MARGIN): + doc = fitz.open(str(src_path)) + page = doc[0] + paths = page.get_drawings() + + pw = page.mediabox.width + ph = page.mediabox.height + y_mid = ph / 2 + + # Krok 1: horizontální paprsek na y_mid → x_left, x_right + hit_h = [p["rect"] for p in paths if p["rect"].y0 <= y_mid <= p["rect"].y1] + if not hit_h: + raise ValueError("Horizontální paprsek nenašel žádné kresby na y_mid") + + # Elementy z horizontálního paprsku jsou výhradně mřížka (nadpis/copyright + # jsou daleko od y_mid) — jejich y rozsah přímo dává top/bot hranici mřížky. + x_left = min(r.x0 for r in hit_h) + x_right = max(r.x1 for r in hit_h) + top_cut = min(r.y0 for r in hit_h) + bot_cut = max(r.y1 for r in hit_h) + + print(f"x_left={x_left:.1f} x_right={x_right:.1f}") + print(f"top_cut={top_cut:.1f} bot_cut={bot_cut:.1f}") + print(f"stránka: {pw:.1f} x {ph:.1f} pt") + + clip = fitz.Rect( + x_left - margin, + top_cut - margin, + x_right + margin, + bot_cut + margin, + ) + clip_w = clip.width + clip_h = clip.height + + doc_new = fitz.open() + p = doc_new.new_page(width=clip_w, height=clip_h) + p.show_pdf_page(fitz.Rect(0, 0, clip_w, clip_h), doc, 0, clip=clip) + doc_new.save(str(dst_path)) + + doc.close() + doc_new.close() + print(f"Uloženo: {dst_path} ({clip_w:.1f} x {clip_h:.1f} pt)") + + +crop_raycast(SRC, DST) diff --git a/SběrDatRůzné/SudokuKiller/Testy/22_CropFromDB.py b/SběrDatRůzné/SudokuKiller/Testy/22_CropFromDB.py new file mode 100644 index 0000000..dd04cf4 --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/Testy/22_CropFromDB.py @@ -0,0 +1,104 @@ +""" +Stáhne 10 puzzle z MySQL (tabulka sudoku_killer), ořízne ray-cast metodou +a uloží do Testy/verify/ pro vizuální verifikaci. +""" + +import sys +from pathlib import Path +import fitz + +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "Knihovny")) +from mysql_db import connect_mysql + +import pymysql.cursors + +sys.stdout.reconfigure(encoding="utf-8") +sys.stderr.reconfigure(encoding="utf-8") + +OUT_DIR = Path(__file__).parent / "verify" +OUT_DIR.mkdir(exist_ok=True) + +MARGIN = 2 # pt — minimální rámeček + + +def crop_raycast(pdf_bytes: bytes) -> bytes: + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + page = doc[0] + paths = page.get_drawings() + + ph = page.mediabox.height + y_mid = ph / 2 + + hit_h = [(p["rect"], p.get("width") or 0) for p in paths + if p["rect"].y0 <= y_mid <= p["rect"].y1] + if not hit_h: + raise ValueError("Horizontální paprsek nenašel žádné kresby") + + rects = [r for r, _ in hit_h] + x_left = min(r.x0 for r in rects) + x_right = max(r.x1 for r in rects) + top_cut = min(r.y0 for r in rects) + bot_cut = max(r.y1 for r in rects) + + # lineWidth svislých okrajových čar — souřadnice jsou středy, ne vizuální okraje + lw_left = next((lw for r, lw in hit_h if r.x0 == x_left), 0) + lw_right = next((lw for r, lw in hit_h if r.x1 == x_right), 0) + + vis_x_left = x_left - lw_left / 2 + vis_x_right = x_right + lw_right / 2 + # top_cut / bot_cut jsou již vnější vizuální hrany (shodují se s okrajem horizontálních čar) + + clip = fitz.Rect( + vis_x_left - MARGIN, + top_cut - MARGIN, + vis_x_right + MARGIN, + bot_cut + MARGIN, + ) + + doc_new = fitz.open() + p = doc_new.new_page(width=clip.width, height=clip.height) + p.show_pdf_page(fitz.Rect(0, 0, clip.width, clip.height), doc, 0, clip=clip) + + out = doc_new.tobytes() + doc.close() + doc_new.close() + return out + + +def main(): + import pymysql.cursors + conn = connect_mysql(database="puzzle", cursorclass=pymysql.cursors.DictCursor) + cursor = conn.cursor() + + cursor.execute(""" + SELECT puzzle_number, puzzle_date, difficulty, file_puzzle + FROM sudoku_killer + WHERE file_puzzle IS NOT NULL + ORDER BY puzzle_number + LIMIT 10 + """) + rows = cursor.fetchall() + cursor.close() + conn.close() + + print(f"Staženo {len(rows)} záznamů z DB.") + + for row in rows: + num = row["puzzle_number"] + date = row["puzzle_date"] + diff = row["difficulty"] + pdf_bytes = bytes(row["file_puzzle"]) + + try: + cropped = crop_raycast(pdf_bytes) + out_path = OUT_DIR / f"{date} Puzzle SudokuKiller {num} [diff {diff}] cropped.pdf" + out_path.write_bytes(cropped) + print(f" OK #{num} → {out_path.name}") + except Exception as e: + print(f" CHYBA #{num}: {e}", file=sys.stderr) + + print(f"\nHotovo. Soubory v: {OUT_DIR}") + + +if __name__ == "__main__": + main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/23_SizePreview.py b/SběrDatRůzné/SudokuKiller/Testy/23_SizePreview.py new file mode 100644 index 0000000..1135176 --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/Testy/23_SizePreview.py @@ -0,0 +1,84 @@ +""" +Ořízne vzorový puzzle (ray-cast) a vygeneruje jedno PDF s 7 stránkami A4, +každá stránka ukazuje puzzle zmenšený o 10–70 % (krok 10 %). +""" + +import fitz +from pathlib import Path + +SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") +DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/size_preview.pdf") + +A4_W = 595.276 +A4_H = 841.890 +MARGIN = 2 # pt bílý rámeček kolem puzzlu po ořezu + + +def detect_clip(page) -> fitz.Rect: + paths = page.get_drawings() + ph = page.mediabox.height + y_mid = ph / 2 + + hit_h = [(p["rect"], p.get("width") or 0) for p in paths + if p["rect"].y0 <= y_mid <= p["rect"].y1] + if not hit_h: + raise ValueError("Detekce hranic selhala") + + rects = [r for r, _ in hit_h] + x_left = min(r.x0 for r in rects) + x_right = max(r.x1 for r in rects) + top_cut = min(r.y0 for r in rects) + bot_cut = max(r.y1 for r in rects) + + lw_left = next((lw for r, lw in hit_h if r.x0 == x_left), 0) + lw_right = next((lw for r, lw in hit_h if r.x1 == x_right), 0) + + return fitz.Rect( + x_left - lw_left / 2 - MARGIN, + top_cut - MARGIN, + x_right + lw_right / 2 + MARGIN, + bot_cut + MARGIN, + ) + + +def main(): + doc_src = fitz.open(str(SRC)) + page_src = doc_src[0] + clip = detect_clip(page_src) + + puzzle_w = clip.width + puzzle_h = clip.height + print(f"Oříznutý puzzle: {puzzle_w:.1f} × {puzzle_h:.1f} pt") + + doc_out = fitz.open() + + scales = [0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70] + + for scale in scales: + pw = puzzle_w * scale + ph = puzzle_h * scale + + # Vycentrovat na A4 + x0 = (A4_W - pw) / 2 + y0 = (A4_H - ph) / 2 + + page = doc_out.new_page(width=A4_W, height=A4_H) + page.show_pdf_page( + fitz.Rect(x0, y0, x0 + pw, y0 + ph), + doc_src, 0, + clip=clip, + ) + + pct = int(scale * 100) + label = f"{pct} % ({pw:.0f} × {ph:.0f} pt = {pw/72*25.4:.0f} × {ph/72*25.4:.0f} mm)" + page.insert_text((30, 30), label, fontsize=11, color=(0.4, 0.4, 0.4)) + print(f" Stránka {pct}%: puzzle {pw:.0f}×{ph:.0f} pt ({pw/72*25.4:.0f}×{ph/72*25.4:.0f} mm)") + + doc_out.save(str(DST)) + doc_src.close() + doc_out.close() + print(f"\nUloženo: {DST}") + + +if __name__ == "__main__": + main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/24_TwoPuzzles.py b/SběrDatRůzné/SudokuKiller/Testy/24_TwoPuzzles.py new file mode 100644 index 0000000..465de90 --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/Testy/24_TwoPuzzles.py @@ -0,0 +1,73 @@ +""" +Ukázka 2 puzzle vedle sebe na A4 — varianty 93 % (mezera 10 pt) a 89 % (mezera 20 pt). +Výsledek: 2stránkové PDF. +""" + +import fitz +from pathlib import Path + +SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") +DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/two_puzzles.pdf") + +A4_W = 595.276 +A4_H = 841.890 +CROP_MARGIN = 2 + + +def detect_clip(page) -> fitz.Rect: + paths = page.get_drawings() + y_mid = page.mediabox.height / 2 + hit_h = [(p["rect"], p.get("width") or 0) for p in paths + if p["rect"].y0 <= y_mid <= p["rect"].y1] + rects = [r for r, _ in hit_h] + x_left = min(r.x0 for r in rects) + x_right = max(r.x1 for r in rects) + top_cut = min(r.y0 for r in rects) + bot_cut = max(r.y1 for r in rects) + lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0) + lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0) + return fitz.Rect( + x_left - lw_l / 2 - CROP_MARGIN, + top_cut - CROP_MARGIN, + x_right + lw_r / 2 + CROP_MARGIN, + bot_cut + CROP_MARGIN, + ) + + +def add_page(doc_out, doc_src, clip, gap_pt): + scale = (A4_W - 3 * gap_pt) / 2 / clip.width + pw = clip.width * scale + ph = clip.height * scale + y0 = (A4_H - ph) / 2 # vertikálně vycentrovat + + page = doc_out.new_page(width=A4_W, height=A4_H) + + for i in range(2): + x0 = gap_pt + i * (pw + gap_pt) + page.show_pdf_page(fitz.Rect(x0, y0, x0 + pw, y0 + ph), doc_src, 0, clip=clip) + + pct = scale * 100 + label = (f"mezera {gap_pt:.0f} pt | měřítko {pct:.0f} % | " + f"puzzle {pw:.0f} × {ph:.0f} pt = {pw/72*25.4:.0f} × {ph/72*25.4:.0f} mm") + page.insert_text((30, 25), label, fontsize=9, color=(0.4, 0.4, 0.4)) + + +def main(): + doc_src = fitz.open(str(SRC)) + clip = detect_clip(doc_src[0]) + print(f"Oříznutý puzzle: {clip.width:.1f} × {clip.height:.1f} pt") + + doc_out = fitz.open() + for gap in (10, 20): + add_page(doc_out, doc_src, clip, gap) + scale = (A4_W - 3 * gap) / 2 / clip.width + print(f" gap={gap} pt -> meritko {scale*100:.0f} % puzzle {clip.width*scale:.0f}x{clip.height*scale:.0f} pt") + + doc_out.save(str(DST)) + doc_src.close() + doc_out.close() + print(f"\nUloženo: {DST}") + + +if __name__ == "__main__": + main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/25_TwoVertical.py b/SběrDatRůzné/SudokuKiller/Testy/25_TwoVertical.py new file mode 100644 index 0000000..089c934 --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/Testy/25_TwoVertical.py @@ -0,0 +1,76 @@ +""" +2 puzzle na A4 — 100 %, pod sebou, horizontálně vycentrované. +Místo vlevo/vpravo zůstává pro poznámky. +""" + +import fitz +from pathlib import Path + +SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") +DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/two_vertical_110.pdf") + +A4_W = 595.276 +A4_H = 841.890 +CROP_MARGIN = 2 +SCALE = 1.10 + + +def detect_clip(page) -> fitz.Rect: + paths = page.get_drawings() + y_mid = page.mediabox.height / 2 + hit_h = [(p["rect"], p.get("width") or 0) for p in paths + if p["rect"].y0 <= y_mid <= p["rect"].y1] + rects = [r for r, _ in hit_h] + x_left = min(r.x0 for r in rects) + x_right = max(r.x1 for r in rects) + top_cut = min(r.y0 for r in rects) + bot_cut = max(r.y1 for r in rects) + lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0) + lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0) + return fitz.Rect( + x_left - lw_l / 2 - CROP_MARGIN, + top_cut - CROP_MARGIN, + x_right + lw_r / 2 + CROP_MARGIN, + bot_cut + CROP_MARGIN, + ) + + +def main(): + doc_src = fitz.open(str(SRC)) + clip = detect_clip(doc_src[0]) + pw = clip.width * SCALE + ph = clip.height * SCALE + + # Horizontální pozice — vycentrovat na A4 + x0 = (A4_W - pw) / 2 + + # Vertikální rozdělení: 3 mezery (nahoře, mezi, dole) + gap = (A4_H - 2 * ph) / 3 + y_top = gap + y_bot = gap + ph + gap + + side_space = x0 # místo vlevo/vpravo pro poznámky + + print(f"Puzzle: {pw:.1f} x {ph:.1f} pt ({pw/72*25.4:.0f} x {ph/72*25.4:.0f} mm)") + print(f"Meritko: {SCALE*100:.0f} %") + print(f"Misto vlevo/vpravo: {side_space:.1f} pt ({side_space/72*25.4:.0f} mm)") + print(f"Mezera mezi puzzle: {gap:.1f} pt ({gap/72*25.4:.0f} mm)") + + doc_out = fitz.open() + page = doc_out.new_page(width=A4_W, height=A4_H) + + for y0_pos in (y_top, y_bot): + page.show_pdf_page( + fitz.Rect(x0, y0_pos, x0 + pw, y0_pos + ph), + doc_src, 0, + clip=clip, + ) + + doc_out.save(str(DST)) + doc_src.close() + doc_out.close() + print(f"Ulozeno: {DST}") + + +if __name__ == "__main__": + main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/26_SaveLayout.py b/SběrDatRůzné/SudokuKiller/Testy/26_SaveLayout.py new file mode 100644 index 0000000..bced027 --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/Testy/26_SaveLayout.py @@ -0,0 +1,99 @@ +""" +Změří finální puzzle, spočítá layout "2PuzzleOnA4" a uloží do layouts.json. +""" + +import json +import fitz +from pathlib import Path + +SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") +JSON_PATH = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/layouts.json") + +A4_W_PT = 595.276 +A4_H_PT = 841.890 +CROP_MARGIN = 2 +TARGET_SCALE = 1.10 # 110 % — to co se nám líbilo + + +def pt_to_mm(pt): + return round(pt / 72 * 25.4, 2) + + +def detect_clip(page) -> fitz.Rect: + paths = page.get_drawings() + y_mid = page.mediabox.height / 2 + hit_h = [(p["rect"], p.get("width") or 0) for p in paths + if p["rect"].y0 <= y_mid <= p["rect"].y1] + rects = [r for r, _ in hit_h] + x_left = min(r.x0 for r in rects) + x_right = max(r.x1 for r in rects) + top_cut = min(r.y0 for r in rects) + bot_cut = max(r.y1 for r in rects) + lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0) + lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0) + return fitz.Rect( + x_left - lw_l / 2 - CROP_MARGIN, + top_cut - CROP_MARGIN, + x_right + lw_r / 2 + CROP_MARGIN, + bot_cut + CROP_MARGIN, + ) + + +def main(): + doc = fitz.open(str(SRC)) + clip = detect_clip(doc[0]) + doc.close() + + raw_w_mm = pt_to_mm(clip.width) + raw_h_mm = pt_to_mm(clip.height) + + target_w_mm = round(pt_to_mm(clip.width * TARGET_SCALE), 2) + target_h_mm = round(pt_to_mm(clip.height * TARGET_SCALE), 2) + + target_w_pt = clip.width * TARGET_SCALE + target_h_pt = clip.height * TARGET_SCALE + + gap_pt = (A4_H_PT - 2 * target_h_pt) / 3 + side_pt = (A4_W_PT - target_w_pt) / 2 + + layout = { + "2PuzzleOnA4": { + "description": "2 puzzle pod sebou, horizontalne vycentrovane, misto po stranach na vypocty", + "page": { + "format": "A4", + "width_pt": A4_W_PT, + "height_pt": A4_H_PT + }, + "count": 2, + "arrangement": "vertical", + "horizontal_align": "center", + "vertical_distribution": "equal_gaps", + "target_puzzle_width_mm": target_w_mm, + "target_puzzle_height_mm": target_h_mm, + "crop_margin_pt": CROP_MARGIN, + "info": { + "sample_raw_puzzle_mm": f"{raw_w_mm} x {raw_h_mm}", + "scale_used_for_sample": TARGET_SCALE, + "side_margin_mm": pt_to_mm(side_pt), + "gap_between_puzzles_mm": pt_to_mm(gap_pt) + } + } + } + + # Načíst existující JSON a přidat/přepsat klíč + if JSON_PATH.exists(): + existing = json.loads(JSON_PATH.read_text(encoding="utf-8")) + existing.update(layout) + layout = existing + + JSON_PATH.write_text(json.dumps(layout, indent=2, ensure_ascii=False), encoding="utf-8") + + print(f"Ulozeno: {JSON_PATH}") + print(f" Surove puzzle: {raw_w_mm} x {raw_h_mm} mm") + print(f" Cilova velikost: {target_w_mm} x {target_h_mm} mm") + print(f" Misto po stranach: {pt_to_mm(side_pt):.1f} mm") + print(f" Mezera mezi puzzle: {pt_to_mm(gap_pt):.1f} mm") + + +if __name__ == "__main__": + main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/27_ApplyLayout.py b/SběrDatRůzné/SudokuKiller/Testy/27_ApplyLayout.py new file mode 100644 index 0000000..748795f --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/Testy/27_ApplyLayout.py @@ -0,0 +1,133 @@ +""" +Načte layout z layouts.json a aplikuje ho na 2 vstupní PDF soubory. + +Použití: + python 27_ApplyLayout.py [--layout 2PuzzleOnA4] + +Skript si sám detekuje hranice každého puzzle (ray-cast), spočítá +scale z aktuální velikosti vs. cílové velikosti v JSON a rozmístí je. +""" + +import sys +import json +import argparse +import fitz +from pathlib import Path + +LAYOUTS_JSON = Path(__file__).parent.parent / "layouts.json" +DEFAULT_LAYOUT = "2PuzzleOnA4" +CROP_MARGIN_FALLBACK = 2 + + +def detect_clip(page, crop_margin) -> fitz.Rect: + paths = page.get_drawings() + y_mid = page.mediabox.height / 2 + hit_h = [(p["rect"], p.get("width") or 0) for p in paths + if p["rect"].y0 <= y_mid <= p["rect"].y1] + if not hit_h: + raise ValueError("Ray-cast detekce selhala — zadne kresby na y_mid") + rects = [r for r, _ in hit_h] + x_left = min(r.x0 for r in rects) + x_right = max(r.x1 for r in rects) + top_cut = min(r.y0 for r in rects) + bot_cut = max(r.y1 for r in rects) + lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0) + lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0) + return fitz.Rect( + x_left - lw_l / 2 - crop_margin, + top_cut - crop_margin, + x_right + lw_r / 2 + crop_margin, + bot_cut + crop_margin, + ) + + +def mm_to_pt(mm): + return mm / 25.4 * 72 + + +def apply_2_vertical(doc_out, sources, layout): + page_w = layout["page"]["width_pt"] + page_h = layout["page"]["height_pt"] + target_w_pt = mm_to_pt(layout["target_puzzle_width_mm"]) + target_h_pt = mm_to_pt(layout["target_puzzle_height_mm"]) + crop_margin = layout.get("crop_margin_pt", CROP_MARGIN_FALLBACK) + + page = doc_out.new_page(width=page_w, height=page_h) + + clips = [] + for doc_src in sources: + clip = detect_clip(doc_src[0], crop_margin) + clips.append(clip) + actual_w_mm = clip.width / 72 * 25.4 + actual_h_mm = clip.height / 72 * 25.4 + scale_w = target_w_pt / clip.width + scale_h = target_h_pt / clip.height + print(f" Puzzle: {actual_w_mm:.1f} x {actual_h_mm:.1f} mm -> scale {scale_w:.3f} x {scale_h:.3f}") + + # Pro každý puzzle spočítej scale individuálně + positions = [] + for clip in clips: + pw = clip.width * (target_w_pt / clip.width) + ph = clip.height * (target_h_pt / clip.height) + positions.append((pw, ph)) + + # Vertikální rozmístění — equal gaps (předpokládáme stejnou výšku obou) + ph0 = positions[0][1] + ph1 = positions[1][1] + gap0 = (page_h - ph0 - ph1) / 3 + gap1 = gap0 + + y0 = gap0 + y1 = gap0 + ph0 + gap1 + + for i, (doc_src, clip, (pw, ph)) in enumerate(zip(sources, clips, positions)): + x0 = (page_w - pw) / 2 + y_pos = y0 if i == 0 else y1 + page.show_pdf_page( + fitz.Rect(x0, y_pos, x0 + pw, y_pos + ph), + doc_src, 0, + clip=clip, + ) + + side_mm = ((page_w - positions[0][0]) / 2) / 72 * 25.4 + gap_mm = gap0 / 72 * 25.4 + print(f" Misto po stranach: {side_mm:.1f} mm | Mezera: {gap_mm:.1f} mm") + + +def main(): + parser = argparse.ArgumentParser(description="Aplikuje layout na 2 puzzle PDF") + parser.add_argument("pdf1", help="Prvni puzzle PDF") + parser.add_argument("pdf2", help="Druhy puzzle PDF") + parser.add_argument("vystup", help="Vystupni PDF") + parser.add_argument("--layout", default=DEFAULT_LAYOUT, help=f"Nazev layoutu (default: {DEFAULT_LAYOUT})") + args = parser.parse_args() + + if not LAYOUTS_JSON.exists(): + print(f"CHYBA: {LAYOUTS_JSON} nenalezen. Spust nejdrive 26_SaveLayout.py.", file=sys.stderr) + sys.exit(1) + + layouts = json.loads(LAYOUTS_JSON.read_text(encoding="utf-8")) + if args.layout not in layouts: + print(f"CHYBA: layout '{args.layout}' nenalezen v {LAYOUTS_JSON}", file=sys.stderr) + print(f"Dostupne layouty: {list(layouts.keys())}", file=sys.stderr) + sys.exit(1) + + layout = layouts[args.layout] + print(f"Layout: {args.layout}") + print(f"Cilova velikost: {layout['target_puzzle_width_mm']} x {layout['target_puzzle_height_mm']} mm") + + doc1 = fitz.open(args.pdf1) + doc2 = fitz.open(args.pdf2) + doc_out = fitz.open() + + apply_2_vertical(doc_out, [doc1, doc2], layout) + + doc_out.save(args.vystup) + doc1.close() + doc2.close() + doc_out.close() + print(f"Ulozeno: {args.vystup}") + + +if __name__ == "__main__": + main() diff --git a/SběrDatRůzné/SudokuKiller/layouts.json b/SběrDatRůzné/SudokuKiller/layouts.json new file mode 100644 index 0000000..2bc2e62 --- /dev/null +++ b/SběrDatRůzné/SudokuKiller/layouts.json @@ -0,0 +1,23 @@ +{ + "2PuzzleOnA4": { + "description": "2 puzzle pod sebou, horizontalne vycentrovane, misto po stranach na vypocty", + "page": { + "format": "A4", + "width_pt": 595.276, + "height_pt": 841.89 + }, + "count": 2, + "arrangement": "vertical", + "horizontal_align": "center", + "vertical_distribution": "equal_gaps", + "target_puzzle_width_mm": 117.83, + "target_puzzle_height_mm": 117.83, + "crop_margin_pt": 2, + "info": { + "sample_raw_puzzle_mm": "107.12 x 107.12", + "scale_used_for_sample": 1.1, + "side_margin_mm": 46.09, + "gap_between_puzzles_mm": 20.45 + } + } +} \ No newline at end of file