diff --git a/SběrDatRůzné/DailyCalcudoku/vykresli_calcudoku.py b/SběrDatRůzné/DailyCalcudoku/vykresli_calcudoku.py index ec02fe6..a66e78f 100644 --- a/SběrDatRůzné/DailyCalcudoku/vykresli_calcudoku.py +++ b/SběrDatRůzné/DailyCalcudoku/vykresli_calcudoku.py @@ -157,6 +157,52 @@ def draw_calcudoku(c: Canvas, x0: float, y0: float, cell: float, c.line(lx, ly1, lx, ly2) +def generate_pdf(puzzles: list[dict], output_path: Path): + """ + Vygeneruje PDF ze seznamu puzzle. + Každý dict musí mít: difficulty, cages_str, solution_str, grid_size, puzzle_date. + """ + BOARD_CM = 11 + SOL_CM = 6 + GAP = 1.5 * cm + page_w, page_h = A4 + + prepped = [] + for p in puzzles: + gs = p["grid_size"] + cell = BOARD_CM * cm / gs + cages = parse_cages(p["cages_str"]) + cage_map = build_cage_map(cages, gs) + solution = parse_solution(p["solution_str"], gs) + prepped.append((p, gs, cell, cages, cage_map, solution)) + + c = Canvas(str(output_path), pagesize=A4) + + # Zadání — 2 puzzle nad sebou na stránku + for i in range(0, len(prepped), 2): + for j, (p, gs, cell, cages, cage_map, _) in enumerate(prepped[i:i + 2]): + board = gs * cell + x0 = (page_w - board) / 2 + y0 = page_h - 2 * cm - j * (BOARD_CM * cm + 3 * cm) + draw_calcudoku(c, x0, y0, cell, cages, cage_map, gs, + f"Calcudoku {p['difficulty']} — {p['puzzle_date']}") + c.showPage() + + # Řešení + c.setFont("ArialBold", 14) + c.drawCentredString(page_w / 2, page_h - 2 * cm, "Řešení") + y_cursor = page_h - 3.5 * cm + for p, gs, _, cages, cage_map, solution in prepped: + sol_cell = SOL_CM * cm / gs + sol_board = gs * sol_cell + x0 = (page_w - sol_board) / 2 + draw_calcudoku(c, x0, y_cursor, sol_cell, cages, cage_map, gs, + p["difficulty"], solution=solution) + y_cursor -= sol_board + GAP + c.showPage() + c.save() + + def main(): conn = connect_mysql(database="puzzle") cur = conn.cursor() @@ -176,31 +222,14 @@ def main(): difficulty, cages_str, solution_str, extra_json = row extra = json.loads(extra_json) - grid_size = extra["grid_size"] - - cages = parse_cages(cages_str) - cage_map = build_cage_map(cages, grid_size) - solution = parse_solution(solution_str, grid_size) - - page_w, page_h = A4 - board_cm = 11 - cell = board_cm * cm / grid_size - board = grid_size * cell - - c = Canvas(str(OUTPUT), pagesize=A4) - - # Zadání - x0 = (page_w - board) / 2 - y0 = page_h - 2 * cm - draw_calcudoku(c, x0, y0, cell, cages, cage_map, grid_size, - f"Calcudoku {difficulty} — 2026-05-08") - - # Řešení - y0_sol = y0 - board - 3 * cm - draw_calcudoku(c, x0, y0_sol, cell, cages, cage_map, grid_size, - "Řešení", solution=solution) - - c.save() + puzzles = [{ + "difficulty": difficulty, + "cages_str": cages_str, + "solution_str": solution_str, + "grid_size": extra["grid_size"], + "puzzle_date": "2026-05-08", + }] + generate_pdf(puzzles, OUTPUT) print(f"PDF uloženo: {OUTPUT}") diff --git a/SběrDatRůzné/DailyKakuro/vykresli_kakuro.py b/SběrDatRůzné/DailyKakuro/vykresli_kakuro.py index af16644..57f3d8a 100644 --- a/SběrDatRůzné/DailyKakuro/vykresli_kakuro.py +++ b/SběrDatRůzné/DailyKakuro/vykresli_kakuro.py @@ -62,6 +62,7 @@ def draw_kakuro(c: Canvas, x0: float, y0: float, cell: float, num_font = max(cell * 0.5, 6) if title: + c.setFillColor(colors.black) c.setFont("ArialBold", 12) c.drawString(x0, y0 + 5, title) @@ -114,6 +115,43 @@ def draw_kakuro(c: Canvas, x0: float, y0: float, cell: float, c.line(x0 + i * cell, y0, x0 + i * cell, y0 - h * cell) +def generate_pdf(puzzles: list[dict], output_path: Path): + """puzzles: difficulty, puzzle_str, puzzle_date""" + BOARD_CM = 11 + SOL_CM = 6 + GAP = 1.5 * cm + page_w, page_h = A4 + + prepped = [] + for p in puzzles: + grid = parse_grid(p["puzzle_str"]) + h, w = len(grid), len(grid[0]) + cell = BOARD_CM * cm / max(h, w) + prepped.append((p, h, w, cell)) + + c = Canvas(str(output_path), pagesize=A4) + + for i in range(0, len(prepped), 2): + for j, (p, h, w, cell) in enumerate(prepped[i:i + 2]): + x0 = (page_w - w * cell) / 2 + y0 = page_h - 2 * cm - j * (BOARD_CM * cm + 3 * cm) + draw_kakuro(c, x0, y0, cell, p["puzzle_str"], + f"Kakuro {p['difficulty'].capitalize()} — {p['puzzle_date']}") + c.showPage() + + c.setFont("ArialBold", 14) + c.drawCentredString(page_w / 2, page_h - 2 * cm, "Řešení") + y_cursor = page_h - 3.5 * cm + for p, h, w, _ in prepped: + sol_cell = SOL_CM * cm / max(h, w) + x0 = (page_w - w * sol_cell) / 2 + draw_kakuro(c, x0, y_cursor, sol_cell, p["puzzle_str"], + p["difficulty"].capitalize(), show_solution=True) + y_cursor -= h * sol_cell + GAP + c.showPage() + c.save() + + def main(): conn = connect_mysql(database="puzzle") cur = conn.cursor() diff --git a/SběrDatRůzné/DailyStr8ts/vykresli_puzzle.py b/SběrDatRůzné/DailyStr8ts/vykresli_puzzle.py index 9a200c7..67a30e3 100644 --- a/SběrDatRůzné/DailyStr8ts/vykresli_puzzle.py +++ b/SběrDatRůzné/DailyStr8ts/vykresli_puzzle.py @@ -57,6 +57,72 @@ def draw_str8ts(c: Canvas, x0: float, y0: float, puzzle: str, bw: str, title: st c.line(x0 + i * CELL, y0, x0 + i * CELL, y0 - BOARD) +def _draw_sized(c: Canvas, x0: float, y0: float, cell: float, + puzzle: str, bw: str, title: str = ""): + grid = 9 + board = grid * cell + font_size = cell * 0.55 + + if title: + c.setFont("Helvetica-Bold", 12) + c.drawString(x0, y0 + 5, title) + + for idx in range(81): + row, col = divmod(idx, 9) + cell_x = x0 + col * cell + cell_y = y0 - (row + 1) * cell + is_black = bw[idx] == "1" + ch = puzzle[idx] + + if is_black: + c.setFillColor(colors.black) + c.rect(cell_x, cell_y, cell, cell, fill=1, stroke=0) + + if ch in "123456789": + c.setFillColor(colors.yellow if is_black else colors.black) + c.setFont("Helvetica-Bold", max(font_size, 4)) + c.drawCentredString(cell_x + cell / 2, cell_y + cell * 0.3, ch) + c.setFillColor(colors.black) + + for i in range(grid + 1): + c.setLineWidth(0.8) + c.line(x0, y0 - i * cell, x0 + board, y0 - i * cell) + c.line(x0 + i * cell, y0, x0 + i * cell, y0 - board) + + +def generate_pdf(puzzles: list[dict], output_path: Path): + """puzzles: difficulty, puzzle, bw, solution, puzzle_date""" + BOARD_CM = 11 + SOL_CM = 6 + GAP = 1.5 * cm + page_w, page_h = A4 + cell_main = BOARD_CM * cm / 9 + board_main = 9 * cell_main + x0_main = (page_w - board_main) / 2 + + c = Canvas(str(output_path), pagesize=A4) + + for i in range(0, len(puzzles), 2): + for j, p in enumerate(puzzles[i:i + 2]): + y0 = page_h - 2 * cm - j * (BOARD_CM * cm + 3 * cm) + _draw_sized(c, x0_main, y0, cell_main, p["puzzle"], p["bw"], + f"Str8ts {p['difficulty'].capitalize()} — {p['puzzle_date']}") + c.showPage() + + c.setFont("Helvetica-Bold", 14) + c.drawCentredString(page_w / 2, page_h - 2 * cm, "Řešení") + y_cursor = page_h - 3.5 * cm + sol_cell = SOL_CM * cm / 9 + sol_board = 9 * sol_cell + x0_sol = (page_w - sol_board) / 2 + for p in puzzles: + _draw_sized(c, x0_sol, y_cursor, sol_cell, p["solution"], p["bw"], + p["difficulty"].capitalize()) + y_cursor -= sol_board + GAP + c.showPage() + c.save() + + def main(): conn = connect_mysql(database="puzzle") cur = conn.cursor() diff --git a/SběrDatRůzné/DailySudoku/vykresli_sudoku.py b/SběrDatRůzné/DailySudoku/vykresli_sudoku.py index c7d50bc..c56b2b9 100644 --- a/SběrDatRůzné/DailySudoku/vykresli_sudoku.py +++ b/SběrDatRůzné/DailySudoku/vykresli_sudoku.py @@ -76,6 +76,39 @@ def draw_sudoku(c: Canvas, x0: float, y0: float, cell: float, c.line(x0 + i * cell, y0, x0 + i * cell, y0 - 9 * cell) +def generate_pdf(puzzles: list[dict], output_path: Path): + """puzzles: difficulty, puzzle, solution, puzzle_date""" + BOARD_CM = 11 + SOL_CM = 6 + GAP = 1.5 * cm + page_w, page_h = A4 + + c = Canvas(str(output_path), pagesize=A4) + + for i in range(0, len(puzzles), 2): + for j, p in enumerate(puzzles[i:i + 2]): + cell = BOARD_CM * cm / 9 + board = 9 * cell + x0 = (page_w - board) / 2 + y0 = page_h - 2 * cm - j * (BOARD_CM * cm + 3 * cm) + draw_sudoku(c, x0, y0, cell, p["puzzle"], + f"Sudoku {p['difficulty'].capitalize()} — {p['puzzle_date']}") + c.showPage() + + c.setFont("ArialBold", 14) + c.drawCentredString(page_w / 2, page_h - 2 * cm, "Řešení") + y_cursor = page_h - 3.5 * cm + for p in puzzles: + sol_cell = SOL_CM * cm / 9 + sol_board = 9 * sol_cell + x0 = (page_w - sol_board) / 2 + draw_sudoku(c, x0, y_cursor, sol_cell, p["puzzle"], + p["difficulty"].capitalize(), show_solution=True, solution=p["solution"]) + y_cursor -= sol_board + GAP + c.showPage() + c.save() + + def main(): conn = connect_mysql(database="puzzle") cur = conn.cursor() diff --git a/SběrDatRůzné/DailySuguru/vykresli_suguru.py b/SběrDatRůzné/DailySuguru/vykresli_suguru.py index 9aba3db..eeb4d8c 100644 --- a/SběrDatRůzné/DailySuguru/vykresli_suguru.py +++ b/SběrDatRůzné/DailySuguru/vykresli_suguru.py @@ -126,6 +126,43 @@ def draw_suguru(c: Canvas, x0: float, y0: float, cell: float, x0 + (co + 1) * cell, y0 - rows * cell) +def generate_pdf(puzzles: list[dict], output_path: Path): + """puzzles: difficulty, puzzle_str, solution_str, grid_size, puzzle_date""" + BOARD_CM = 11 + SOL_CM = 6 + GAP = 1.5 * cm + page_w, page_h = A4 + + prepped = [] + for p in puzzles: + color_map, clues, rows, cols = parse_puzzle(p["puzzle_str"], p["grid_size"]) + solution = parse_solution(p["solution_str"], rows, cols) + cell = BOARD_CM * cm / max(rows, cols) + prepped.append((p, rows, cols, cell, color_map, clues, solution)) + + c = Canvas(str(output_path), pagesize=A4) + + for i in range(0, len(prepped), 2): + for j, (p, rows, cols, cell, color_map, clues, _) in enumerate(prepped[i:i + 2]): + x0 = (page_w - cols * cell) / 2 + y0 = page_h - 2 * cm - j * (BOARD_CM * cm + 3 * cm) + draw_suguru(c, x0, y0, cell, color_map, clues, rows, cols, + f"Suguru {p['difficulty']} — {p['puzzle_date']}") + c.showPage() + + c.setFont("ArialBold", 14) + c.drawCentredString(page_w / 2, page_h - 2 * cm, "Řešení") + y_cursor = page_h - 3.5 * cm + for p, rows, cols, _, color_map, clues, solution in prepped: + sol_cell = SOL_CM * cm / max(rows, cols) + x0 = (page_w - cols * sol_cell) / 2 + draw_suguru(c, x0, y_cursor, sol_cell, color_map, clues, rows, cols, + p["difficulty"], solution=solution) + y_cursor -= rows * sol_cell + GAP + c.showPage() + c.save() + + def main(): conn = connect_mysql(database="puzzle") cur = conn.cursor() diff --git a/SběrDatRůzné/SudokuKiller/NOTES.md b/SběrDatRůzné/SudokuKiller/NOTES.md deleted file mode 100644 index bd4658c..0000000 --- a/SběrDatRůzné/SudokuKiller/NOTES.md +++ /dev/null @@ -1,107 +0,0 @@ -# SudokuKiller — technické poznámky - -## Hlavní skripty - -| Skript | Popis | -|--------|-------| -| `stahni_killer_structured.py` | Stáhne strukturovaná data (cage definice + řešení) z dailykillersudoku.com do MySQL tabulky `puzzles`. Průběžně ukládá zálohu do `killer_structured_data.json` | -| `vykresli_killer_sudoku.py` | Vygeneruje PDF z dat v MySQL — Killer Sudoku zadání + řešení, vektorové, vzhledem identické s originálem z webu | - -Ostatní (stará pipeline s PDF bloby, průzkumné skripty, testovací PDF) je v podadresáři `Testy/`. - -## Zdroj dat - -Web: https://www.dailykillersudoku.com/ - -Každý puzzle má stránku `/puzzle/{N}` s inline JSON daty v HTML: -```javascript -DKS.puzzle = new DKS.Puzzle({ - "id": 376, - "date": "2009-05-04", - "difficulty": 4, - "board_base64": "AZoACQAE...", - "solution_base64": "AJoICQIG...", - "puzzle_type": 1 -}) -``` - -## Dekódování base64 - -### board_base64 -- 2 bajty header (puzzle_type, flags) -- 81 × 2 bajty = 162 bajtů — cage ID pro každou buňku (uint16 big-endian) -- N bajtů — součet pro každou klec (1 bajt = max 255) - -### solution_base64 -- 2 bajty header -- 81 bajtů — čísla řešení (řádek po řádku) - -## Typy puzzle - -| puzzle_type | game_type v DB | Popis | -|-------------|----------------|-------| -| 1 | `killer_sudoku` | Killer Sudoku — klece se součty | -| 2 | `killer_sudoku_gt` | Greater-Than Killer Sudoku — klece + nerovnosti | - -## Obtížnost - -Škála 1–10 (z webu), uložena v `difficulty`. - -## MySQL — sdílená tabulka `puzzles` - -Strukturovaná data: -- `game_type` = `'killer_sudoku'` / `'killer_sudoku_gt'` -- `difficulty` = `'1'` až `'10'` -- `puzzle` = klece ve formátu `sum,r0c1r0c2|sum,r3c4r3c5|...` (`VARCHAR(1000)`) -- `solution` = flat string 81 číslic (`VARCHAR(1000)`) -- `extra` = `{"grid_size": 9, "puzzle_number": 376, "original_difficulty": 4}` -- `source` = `'dailykillersudoku.com'` - -**Pozor:** `puzzle` a `solution` byly původně `VARCHAR(200)` — nedostačovalo, cage stringy mají až ~500 znaků. Sloupce rozšířeny na `VARCHAR(1000)`. - -## Stav stažených dat - -- ~28 700 puzzlů (1–31 416) -- Killer Sudoku: ~17 200, Greater-Than: ~11 500 -- Zdrojová data v `killer_structured_data.json` (záloha pro případ MySQL chyby) - -## PDF rendering — pořadí vrstev - -Klíčové pro vzhled identický s originálem z webu (`vykresli_killer_sudoku.py`): - -1. **Bílé pozadí** -2. **Čísla řešení** (jen pro řešovou variantu, šedě) -3. **Tečkované ohraničení klecí** — odsazené dovnitř buněk o `cell * 0.10`, slévání segmentů v rámci stejné klece (jeden `c.line()` přes víc buněk → pattern teček neresetuje) -4. **Tenká plná mřížka** — všechny řádky/sloupce, šedě (překryje přesahy tečkovaných v křížení) -5. **Tlusté čáry 3×3** + obvod, černě -6. **Popisky součtů** — bíle podsvícené, ArialBold - -### Vnější vs vnitřní rohy klecí - -Při slévání tečkovaných segmentů endpoints buďto **zkrátit** o inset (vnější roh) nebo **prodloužit** o inset (vnitřní roh — kde klec zahýbá L-tvarem). - -Detekce: pro horizontální segment top borderu od sloupce `s` do `co` (exclusive): -- Levý konec vnitřní roh = `cage_map[r][s-1] == cid` → prodloužit -- Pravý konec vnitřní roh = `cage_map[r][co] == cid` → prodloužit - -Bez tohoto fixu se na vnitřních rozích L-tvarů objevují viditelné mezery. - -## Závislosti - -- `requests` — HTTP fetch (bez Playwright, data jsou inline v HTML) -- `reportlab` — PDF generation (vektorová grafika) -- `tqdm` — progress bar -- `mysql_db` (lokální Knihovny) — DB připojení - -## Použití - -```bash -# Stažení dat (s pokračováním z JSON pokud existuje) -python stahni_killer_structured.py --run - -# Pouze import už stažených JSON dat do MySQL -python stahni_killer_structured.py --import - -# Vygenerování PDF pro puzzle 31414 -python vykresli_killer_sudoku.py -``` diff --git a/SběrDatRůzné/SudokuKiller/Testy/10 MakePuzzleSmaller.py b/SběrDatRůzné/SudokuKiller/Testy/10 MakePuzzleSmaller.py deleted file mode 100644 index 57b80c0..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/10 MakePuzzleSmaller.py +++ /dev/null @@ -1,42 +0,0 @@ -from pathlib import Path -from pypdf import PdfReader, PdfWriter, Transformation, PageObject - -INPUT_PDF = Path(r"2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") -OUTPUT_PDF = Path(r"sudoku_50pct_A4.pdf") - -# A4 v bodech, 72 dpi -A4_WIDTH = 595.2756 -A4_HEIGHT = 841.8898 - -SCALE = 0.5 - -reader = PdfReader(str(INPUT_PDF)) -source_page = reader.pages[0] - -source_width = float(source_page.mediabox.width) -source_height = float(source_page.mediabox.height) - -# Nová prázdná A4 stránka -new_page = PageObject.create_blank_page( - width=A4_WIDTH, - height=A4_HEIGHT -) - -# Výpočet pozice pro vycentrování -target_width = source_width * SCALE -target_height = source_height * SCALE - -x = (A4_WIDTH - target_width) / 2 -y = (A4_HEIGHT - target_height) / 2 - -# Vložit původní PDF stránku jako vektorový objekt, zmenšený na 50 % -transform = Transformation().scale(SCALE).translate(x, y) -new_page.merge_transformed_page(source_page, transform, expand=False) - -writer = PdfWriter() -writer.add_page(new_page) - -with OUTPUT_PDF.open("wb") as f: - writer.write(f) - -print(f"Hotovo: {OUTPUT_PDF}") \ No newline at end of file diff --git a/SběrDatRůzné/SudokuKiller/Testy/20_CropPuzzles.py b/SběrDatRůzné/SudokuKiller/Testy/20_CropPuzzles.py deleted file mode 100644 index 2894980..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/20_CropPuzzles.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -Batch crop Killer Sudoku PDF souborů — odstraní nadpis nahoře a copyright dole. -Zachovává vektorový obsah (cairo-generované PDF). - -Použití: - python 20_CropPuzzles.py [--workers N] -""" - -import argparse -import csv -import sys -from concurrent.futures import ProcessPoolExecutor, as_completed -from pathlib import Path - -import fitz # PyMuPDF -from tqdm import tqdm - - -def detect_cuts(paths): - """Vrátí (top_cut, bot_cut) nebo (None, None) pokud detekce selže.""" - ys0 = sorted(set(round(p["rect"].y0) for p in paths)) - ys1 = sorted(set(round(p["rect"].y1) for p in paths)) - - top_cut = None - for i in range(1, len(ys0)): - if ys0[i] - ys0[i - 1] > 10: - top_cut = (ys0[i - 1] + ys0[i]) / 2 - break - - bot_cut = None - for i in range(len(ys1) - 1, 0, -1): - if ys1[i] - ys1[i - 1] > 5: - bot_cut = (ys1[i - 1] + ys1[i]) / 2 - break - - return top_cut, bot_cut - - -def crop_one(args): - """Zpracuje jeden soubor. Vrátí (src_path, status, detail).""" - src_path, dst_path = args - try: - doc_src = fitz.open(str(src_path)) - page = doc_src[0] - paths = page.get_drawings() - - if not paths: - doc_src.close() - return str(src_path), "anomalie", "žádné kresby (get_drawings prázdný)" - - top_cut, bot_cut = detect_cuts(paths) - - if top_cut is None or bot_cut is None: - doc_src.close() - return str(src_path), "anomalie", f"gap detekce selhala (top={top_cut}, bot={bot_cut})" - - page_w = page.mediabox.width - clip = fitz.Rect(0, top_cut, page_w, bot_cut) - - doc_new = fitz.open() - p = doc_new.new_page(width=clip.width, height=clip.height) - p.show_pdf_page(fitz.Rect(0, 0, clip.width, clip.height), doc_src, 0, clip=clip) - - dst_path.parent.mkdir(parents=True, exist_ok=True) - doc_new.save(str(dst_path)) - - doc_src.close() - doc_new.close() - return str(src_path), "ok", "" - - except Exception as e: - return str(src_path), "chyba", str(e) - - -def main(): - parser = argparse.ArgumentParser(description="Batch crop Killer Sudoku PDF") - parser.add_argument("vstup", help="Vstupní adresář s PDF soubory") - parser.add_argument("vystup", help="Výstupní adresář pro oříznuté PDF") - parser.add_argument("--workers", type=int, default=4, help="Počet procesů (default: 4)") - args = parser.parse_args() - - src_dir = Path(args.vstup) - dst_dir = Path(args.vystup) - - if not src_dir.is_dir(): - print(f"Chyba: vstupní adresář neexistuje: {src_dir}", file=sys.stderr) - sys.exit(1) - - dst_dir.mkdir(parents=True, exist_ok=True) - - all_pdfs = sorted(src_dir.rglob("*.pdf")) - if not all_pdfs: - print("Žádné PDF soubory nenalezeny.") - sys.exit(0) - - # Přeskočit již zpracované - tasks = [] - skipped = 0 - for src in all_pdfs: - rel = src.relative_to(src_dir) - dst = dst_dir / rel - if dst.exists(): - skipped += 1 - else: - tasks.append((src, dst)) - - print(f"Celkem PDF: {len(all_pdfs)}, přeskočeno (existují): {skipped}, ke zpracování: {len(tasks)}") - - if not tasks: - print("Vše již zpracováno.") - return - - errors_csv = dst_dir / "errors.csv" - errors = [] - - with ProcessPoolExecutor(max_workers=args.workers) as executor: - futures = {executor.submit(crop_one, t): t for t in tasks} - with tqdm(total=len(tasks), unit="soubor") as bar: - for future in as_completed(futures): - src_path, status, detail = future.result() - if status != "ok": - errors.append({"soubor": src_path, "typ": status, "detail": detail}) - bar.update(1) - bar.set_postfix(chyby=len(errors)) - - if errors: - with open(errors_csv, "w", newline="", encoding="utf-8") as f: - writer = csv.DictWriter(f, fieldnames=["soubor", "typ", "detail"]) - writer.writeheader() - writer.writerows(errors) - print(f"\nChyby/anomálie: {len(errors)} — viz {errors_csv}") - else: - print("\nVšechny soubory zpracovány bez chyb.") - - -if __name__ == "__main__": - main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/21_CropRayCast.py b/SběrDatRůzné/SudokuKiller/Testy/21_CropRayCast.py deleted file mode 100644 index 116634c..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/21_CropRayCast.py +++ /dev/null @@ -1,61 +0,0 @@ -""" -Crop Killer Sudoku PDF ray-casting metodou: -1. Horizontální paprsek na y_mid → najde x_left, x_right mřížky -2. Vertikální paprsek podél x_left → najde top_cut, bot_cut mřížky -Výsledek: oříznuté PDF jen s mřížkou + malý bílý rámeček (MARGIN). -""" - -import fitz -from pathlib import Path - -MARGIN = 4 # pt bílého rámečku kolem mřížky - -SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") -DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/cropped_raycast.pdf") - - -def crop_raycast(src_path: Path, dst_path: Path, margin: float = MARGIN): - doc = fitz.open(str(src_path)) - page = doc[0] - paths = page.get_drawings() - - pw = page.mediabox.width - ph = page.mediabox.height - y_mid = ph / 2 - - # Krok 1: horizontální paprsek na y_mid → x_left, x_right - hit_h = [p["rect"] for p in paths if p["rect"].y0 <= y_mid <= p["rect"].y1] - if not hit_h: - raise ValueError("Horizontální paprsek nenašel žádné kresby na y_mid") - - # Elementy z horizontálního paprsku jsou výhradně mřížka (nadpis/copyright - # jsou daleko od y_mid) — jejich y rozsah přímo dává top/bot hranici mřížky. - x_left = min(r.x0 for r in hit_h) - x_right = max(r.x1 for r in hit_h) - top_cut = min(r.y0 for r in hit_h) - bot_cut = max(r.y1 for r in hit_h) - - print(f"x_left={x_left:.1f} x_right={x_right:.1f}") - print(f"top_cut={top_cut:.1f} bot_cut={bot_cut:.1f}") - print(f"stránka: {pw:.1f} x {ph:.1f} pt") - - clip = fitz.Rect( - x_left - margin, - top_cut - margin, - x_right + margin, - bot_cut + margin, - ) - clip_w = clip.width - clip_h = clip.height - - doc_new = fitz.open() - p = doc_new.new_page(width=clip_w, height=clip_h) - p.show_pdf_page(fitz.Rect(0, 0, clip_w, clip_h), doc, 0, clip=clip) - doc_new.save(str(dst_path)) - - doc.close() - doc_new.close() - print(f"Uloženo: {dst_path} ({clip_w:.1f} x {clip_h:.1f} pt)") - - -crop_raycast(SRC, DST) diff --git a/SběrDatRůzné/SudokuKiller/Testy/22_CropFromDB.py b/SběrDatRůzné/SudokuKiller/Testy/22_CropFromDB.py deleted file mode 100644 index dd04cf4..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/22_CropFromDB.py +++ /dev/null @@ -1,104 +0,0 @@ -""" -Stáhne 10 puzzle z MySQL (tabulka sudoku_killer), ořízne ray-cast metodou -a uloží do Testy/verify/ pro vizuální verifikaci. -""" - -import sys -from pathlib import Path -import fitz - -sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "Knihovny")) -from mysql_db import connect_mysql - -import pymysql.cursors - -sys.stdout.reconfigure(encoding="utf-8") -sys.stderr.reconfigure(encoding="utf-8") - -OUT_DIR = Path(__file__).parent / "verify" -OUT_DIR.mkdir(exist_ok=True) - -MARGIN = 2 # pt — minimální rámeček - - -def crop_raycast(pdf_bytes: bytes) -> bytes: - doc = fitz.open(stream=pdf_bytes, filetype="pdf") - page = doc[0] - paths = page.get_drawings() - - ph = page.mediabox.height - y_mid = ph / 2 - - hit_h = [(p["rect"], p.get("width") or 0) for p in paths - if p["rect"].y0 <= y_mid <= p["rect"].y1] - if not hit_h: - raise ValueError("Horizontální paprsek nenašel žádné kresby") - - rects = [r for r, _ in hit_h] - x_left = min(r.x0 for r in rects) - x_right = max(r.x1 for r in rects) - top_cut = min(r.y0 for r in rects) - bot_cut = max(r.y1 for r in rects) - - # lineWidth svislých okrajových čar — souřadnice jsou středy, ne vizuální okraje - lw_left = next((lw for r, lw in hit_h if r.x0 == x_left), 0) - lw_right = next((lw for r, lw in hit_h if r.x1 == x_right), 0) - - vis_x_left = x_left - lw_left / 2 - vis_x_right = x_right + lw_right / 2 - # top_cut / bot_cut jsou již vnější vizuální hrany (shodují se s okrajem horizontálních čar) - - clip = fitz.Rect( - vis_x_left - MARGIN, - top_cut - MARGIN, - vis_x_right + MARGIN, - bot_cut + MARGIN, - ) - - doc_new = fitz.open() - p = doc_new.new_page(width=clip.width, height=clip.height) - p.show_pdf_page(fitz.Rect(0, 0, clip.width, clip.height), doc, 0, clip=clip) - - out = doc_new.tobytes() - doc.close() - doc_new.close() - return out - - -def main(): - import pymysql.cursors - conn = connect_mysql(database="puzzle", cursorclass=pymysql.cursors.DictCursor) - cursor = conn.cursor() - - cursor.execute(""" - SELECT puzzle_number, puzzle_date, difficulty, file_puzzle - FROM sudoku_killer - WHERE file_puzzle IS NOT NULL - ORDER BY puzzle_number - LIMIT 10 - """) - rows = cursor.fetchall() - cursor.close() - conn.close() - - print(f"Staženo {len(rows)} záznamů z DB.") - - for row in rows: - num = row["puzzle_number"] - date = row["puzzle_date"] - diff = row["difficulty"] - pdf_bytes = bytes(row["file_puzzle"]) - - try: - cropped = crop_raycast(pdf_bytes) - out_path = OUT_DIR / f"{date} Puzzle SudokuKiller {num} [diff {diff}] cropped.pdf" - out_path.write_bytes(cropped) - print(f" OK #{num} → {out_path.name}") - except Exception as e: - print(f" CHYBA #{num}: {e}", file=sys.stderr) - - print(f"\nHotovo. Soubory v: {OUT_DIR}") - - -if __name__ == "__main__": - main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/23_SizePreview.py b/SběrDatRůzné/SudokuKiller/Testy/23_SizePreview.py deleted file mode 100644 index 1135176..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/23_SizePreview.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Ořízne vzorový puzzle (ray-cast) a vygeneruje jedno PDF s 7 stránkami A4, -každá stránka ukazuje puzzle zmenšený o 10–70 % (krok 10 %). -""" - -import fitz -from pathlib import Path - -SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") -DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/size_preview.pdf") - -A4_W = 595.276 -A4_H = 841.890 -MARGIN = 2 # pt bílý rámeček kolem puzzlu po ořezu - - -def detect_clip(page) -> fitz.Rect: - paths = page.get_drawings() - ph = page.mediabox.height - y_mid = ph / 2 - - hit_h = [(p["rect"], p.get("width") or 0) for p in paths - if p["rect"].y0 <= y_mid <= p["rect"].y1] - if not hit_h: - raise ValueError("Detekce hranic selhala") - - rects = [r for r, _ in hit_h] - x_left = min(r.x0 for r in rects) - x_right = max(r.x1 for r in rects) - top_cut = min(r.y0 for r in rects) - bot_cut = max(r.y1 for r in rects) - - lw_left = next((lw for r, lw in hit_h if r.x0 == x_left), 0) - lw_right = next((lw for r, lw in hit_h if r.x1 == x_right), 0) - - return fitz.Rect( - x_left - lw_left / 2 - MARGIN, - top_cut - MARGIN, - x_right + lw_right / 2 + MARGIN, - bot_cut + MARGIN, - ) - - -def main(): - doc_src = fitz.open(str(SRC)) - page_src = doc_src[0] - clip = detect_clip(page_src) - - puzzle_w = clip.width - puzzle_h = clip.height - print(f"Oříznutý puzzle: {puzzle_w:.1f} × {puzzle_h:.1f} pt") - - doc_out = fitz.open() - - scales = [0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70] - - for scale in scales: - pw = puzzle_w * scale - ph = puzzle_h * scale - - # Vycentrovat na A4 - x0 = (A4_W - pw) / 2 - y0 = (A4_H - ph) / 2 - - page = doc_out.new_page(width=A4_W, height=A4_H) - page.show_pdf_page( - fitz.Rect(x0, y0, x0 + pw, y0 + ph), - doc_src, 0, - clip=clip, - ) - - pct = int(scale * 100) - label = f"{pct} % ({pw:.0f} × {ph:.0f} pt = {pw/72*25.4:.0f} × {ph/72*25.4:.0f} mm)" - page.insert_text((30, 30), label, fontsize=11, color=(0.4, 0.4, 0.4)) - print(f" Stránka {pct}%: puzzle {pw:.0f}×{ph:.0f} pt ({pw/72*25.4:.0f}×{ph/72*25.4:.0f} mm)") - - doc_out.save(str(DST)) - doc_src.close() - doc_out.close() - print(f"\nUloženo: {DST}") - - -if __name__ == "__main__": - main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/24_TwoPuzzles.py b/SběrDatRůzné/SudokuKiller/Testy/24_TwoPuzzles.py deleted file mode 100644 index 465de90..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/24_TwoPuzzles.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -Ukázka 2 puzzle vedle sebe na A4 — varianty 93 % (mezera 10 pt) a 89 % (mezera 20 pt). -Výsledek: 2stránkové PDF. -""" - -import fitz -from pathlib import Path - -SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") -DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/two_puzzles.pdf") - -A4_W = 595.276 -A4_H = 841.890 -CROP_MARGIN = 2 - - -def detect_clip(page) -> fitz.Rect: - paths = page.get_drawings() - y_mid = page.mediabox.height / 2 - hit_h = [(p["rect"], p.get("width") or 0) for p in paths - if p["rect"].y0 <= y_mid <= p["rect"].y1] - rects = [r for r, _ in hit_h] - x_left = min(r.x0 for r in rects) - x_right = max(r.x1 for r in rects) - top_cut = min(r.y0 for r in rects) - bot_cut = max(r.y1 for r in rects) - lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0) - lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0) - return fitz.Rect( - x_left - lw_l / 2 - CROP_MARGIN, - top_cut - CROP_MARGIN, - x_right + lw_r / 2 + CROP_MARGIN, - bot_cut + CROP_MARGIN, - ) - - -def add_page(doc_out, doc_src, clip, gap_pt): - scale = (A4_W - 3 * gap_pt) / 2 / clip.width - pw = clip.width * scale - ph = clip.height * scale - y0 = (A4_H - ph) / 2 # vertikálně vycentrovat - - page = doc_out.new_page(width=A4_W, height=A4_H) - - for i in range(2): - x0 = gap_pt + i * (pw + gap_pt) - page.show_pdf_page(fitz.Rect(x0, y0, x0 + pw, y0 + ph), doc_src, 0, clip=clip) - - pct = scale * 100 - label = (f"mezera {gap_pt:.0f} pt | měřítko {pct:.0f} % | " - f"puzzle {pw:.0f} × {ph:.0f} pt = {pw/72*25.4:.0f} × {ph/72*25.4:.0f} mm") - page.insert_text((30, 25), label, fontsize=9, color=(0.4, 0.4, 0.4)) - - -def main(): - doc_src = fitz.open(str(SRC)) - clip = detect_clip(doc_src[0]) - print(f"Oříznutý puzzle: {clip.width:.1f} × {clip.height:.1f} pt") - - doc_out = fitz.open() - for gap in (10, 20): - add_page(doc_out, doc_src, clip, gap) - scale = (A4_W - 3 * gap) / 2 / clip.width - print(f" gap={gap} pt -> meritko {scale*100:.0f} % puzzle {clip.width*scale:.0f}x{clip.height*scale:.0f} pt") - - doc_out.save(str(DST)) - doc_src.close() - doc_out.close() - print(f"\nUloženo: {DST}") - - -if __name__ == "__main__": - main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/25_TwoVertical.py b/SběrDatRůzné/SudokuKiller/Testy/25_TwoVertical.py deleted file mode 100644 index 089c934..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/25_TwoVertical.py +++ /dev/null @@ -1,76 +0,0 @@ -""" -2 puzzle na A4 — 100 %, pod sebou, horizontálně vycentrované. -Místo vlevo/vpravo zůstává pro poznámky. -""" - -import fitz -from pathlib import Path - -SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") -DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/two_vertical_110.pdf") - -A4_W = 595.276 -A4_H = 841.890 -CROP_MARGIN = 2 -SCALE = 1.10 - - -def detect_clip(page) -> fitz.Rect: - paths = page.get_drawings() - y_mid = page.mediabox.height / 2 - hit_h = [(p["rect"], p.get("width") or 0) for p in paths - if p["rect"].y0 <= y_mid <= p["rect"].y1] - rects = [r for r, _ in hit_h] - x_left = min(r.x0 for r in rects) - x_right = max(r.x1 for r in rects) - top_cut = min(r.y0 for r in rects) - bot_cut = max(r.y1 for r in rects) - lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0) - lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0) - return fitz.Rect( - x_left - lw_l / 2 - CROP_MARGIN, - top_cut - CROP_MARGIN, - x_right + lw_r / 2 + CROP_MARGIN, - bot_cut + CROP_MARGIN, - ) - - -def main(): - doc_src = fitz.open(str(SRC)) - clip = detect_clip(doc_src[0]) - pw = clip.width * SCALE - ph = clip.height * SCALE - - # Horizontální pozice — vycentrovat na A4 - x0 = (A4_W - pw) / 2 - - # Vertikální rozdělení: 3 mezery (nahoře, mezi, dole) - gap = (A4_H - 2 * ph) / 3 - y_top = gap - y_bot = gap + ph + gap - - side_space = x0 # místo vlevo/vpravo pro poznámky - - print(f"Puzzle: {pw:.1f} x {ph:.1f} pt ({pw/72*25.4:.0f} x {ph/72*25.4:.0f} mm)") - print(f"Meritko: {SCALE*100:.0f} %") - print(f"Misto vlevo/vpravo: {side_space:.1f} pt ({side_space/72*25.4:.0f} mm)") - print(f"Mezera mezi puzzle: {gap:.1f} pt ({gap/72*25.4:.0f} mm)") - - doc_out = fitz.open() - page = doc_out.new_page(width=A4_W, height=A4_H) - - for y0_pos in (y_top, y_bot): - page.show_pdf_page( - fitz.Rect(x0, y0_pos, x0 + pw, y0_pos + ph), - doc_src, 0, - clip=clip, - ) - - doc_out.save(str(DST)) - doc_src.close() - doc_out.close() - print(f"Ulozeno: {DST}") - - -if __name__ == "__main__": - main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/26_SaveLayout.py b/SběrDatRůzné/SudokuKiller/Testy/26_SaveLayout.py deleted file mode 100644 index bced027..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/26_SaveLayout.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -Změří finální puzzle, spočítá layout "2PuzzleOnA4" a uloží do layouts.json. -""" - -import json -import fitz -from pathlib import Path - -SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf") -JSON_PATH = Path(r"U:/ordinaceprojekt/SběrDatRůzné/SudokuKiller/layouts.json") - -A4_W_PT = 595.276 -A4_H_PT = 841.890 -CROP_MARGIN = 2 -TARGET_SCALE = 1.10 # 110 % — to co se nám líbilo - - -def pt_to_mm(pt): - return round(pt / 72 * 25.4, 2) - - -def detect_clip(page) -> fitz.Rect: - paths = page.get_drawings() - y_mid = page.mediabox.height / 2 - hit_h = [(p["rect"], p.get("width") or 0) for p in paths - if p["rect"].y0 <= y_mid <= p["rect"].y1] - rects = [r for r, _ in hit_h] - x_left = min(r.x0 for r in rects) - x_right = max(r.x1 for r in rects) - top_cut = min(r.y0 for r in rects) - bot_cut = max(r.y1 for r in rects) - lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0) - lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0) - return fitz.Rect( - x_left - lw_l / 2 - CROP_MARGIN, - top_cut - CROP_MARGIN, - x_right + lw_r / 2 + CROP_MARGIN, - bot_cut + CROP_MARGIN, - ) - - -def main(): - doc = fitz.open(str(SRC)) - clip = detect_clip(doc[0]) - doc.close() - - raw_w_mm = pt_to_mm(clip.width) - raw_h_mm = pt_to_mm(clip.height) - - target_w_mm = round(pt_to_mm(clip.width * TARGET_SCALE), 2) - target_h_mm = round(pt_to_mm(clip.height * TARGET_SCALE), 2) - - target_w_pt = clip.width * TARGET_SCALE - target_h_pt = clip.height * TARGET_SCALE - - gap_pt = (A4_H_PT - 2 * target_h_pt) / 3 - side_pt = (A4_W_PT - target_w_pt) / 2 - - layout = { - "2PuzzleOnA4": { - "description": "2 puzzle pod sebou, horizontalne vycentrovane, misto po stranach na vypocty", - "page": { - "format": "A4", - "width_pt": A4_W_PT, - "height_pt": A4_H_PT - }, - "count": 2, - "arrangement": "vertical", - "horizontal_align": "center", - "vertical_distribution": "equal_gaps", - "target_puzzle_width_mm": target_w_mm, - "target_puzzle_height_mm": target_h_mm, - "crop_margin_pt": CROP_MARGIN, - "info": { - "sample_raw_puzzle_mm": f"{raw_w_mm} x {raw_h_mm}", - "scale_used_for_sample": TARGET_SCALE, - "side_margin_mm": pt_to_mm(side_pt), - "gap_between_puzzles_mm": pt_to_mm(gap_pt) - } - } - } - - # Načíst existující JSON a přidat/přepsat klíč - if JSON_PATH.exists(): - existing = json.loads(JSON_PATH.read_text(encoding="utf-8")) - existing.update(layout) - layout = existing - - JSON_PATH.write_text(json.dumps(layout, indent=2, ensure_ascii=False), encoding="utf-8") - - print(f"Ulozeno: {JSON_PATH}") - print(f" Surove puzzle: {raw_w_mm} x {raw_h_mm} mm") - print(f" Cilova velikost: {target_w_mm} x {target_h_mm} mm") - print(f" Misto po stranach: {pt_to_mm(side_pt):.1f} mm") - print(f" Mezera mezi puzzle: {pt_to_mm(gap_pt):.1f} mm") - - -if __name__ == "__main__": - main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/27_ApplyLayout.py b/SběrDatRůzné/SudokuKiller/Testy/27_ApplyLayout.py deleted file mode 100644 index 748795f..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/27_ApplyLayout.py +++ /dev/null @@ -1,133 +0,0 @@ -""" -Načte layout z layouts.json a aplikuje ho na 2 vstupní PDF soubory. - -Použití: - python 27_ApplyLayout.py [--layout 2PuzzleOnA4] - -Skript si sám detekuje hranice každého puzzle (ray-cast), spočítá -scale z aktuální velikosti vs. cílové velikosti v JSON a rozmístí je. -""" - -import sys -import json -import argparse -import fitz -from pathlib import Path - -LAYOUTS_JSON = Path(__file__).parent.parent / "layouts.json" -DEFAULT_LAYOUT = "2PuzzleOnA4" -CROP_MARGIN_FALLBACK = 2 - - -def detect_clip(page, crop_margin) -> fitz.Rect: - paths = page.get_drawings() - y_mid = page.mediabox.height / 2 - hit_h = [(p["rect"], p.get("width") or 0) for p in paths - if p["rect"].y0 <= y_mid <= p["rect"].y1] - if not hit_h: - raise ValueError("Ray-cast detekce selhala — zadne kresby na y_mid") - rects = [r for r, _ in hit_h] - x_left = min(r.x0 for r in rects) - x_right = max(r.x1 for r in rects) - top_cut = min(r.y0 for r in rects) - bot_cut = max(r.y1 for r in rects) - lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0) - lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0) - return fitz.Rect( - x_left - lw_l / 2 - crop_margin, - top_cut - crop_margin, - x_right + lw_r / 2 + crop_margin, - bot_cut + crop_margin, - ) - - -def mm_to_pt(mm): - return mm / 25.4 * 72 - - -def apply_2_vertical(doc_out, sources, layout): - page_w = layout["page"]["width_pt"] - page_h = layout["page"]["height_pt"] - target_w_pt = mm_to_pt(layout["target_puzzle_width_mm"]) - target_h_pt = mm_to_pt(layout["target_puzzle_height_mm"]) - crop_margin = layout.get("crop_margin_pt", CROP_MARGIN_FALLBACK) - - page = doc_out.new_page(width=page_w, height=page_h) - - clips = [] - for doc_src in sources: - clip = detect_clip(doc_src[0], crop_margin) - clips.append(clip) - actual_w_mm = clip.width / 72 * 25.4 - actual_h_mm = clip.height / 72 * 25.4 - scale_w = target_w_pt / clip.width - scale_h = target_h_pt / clip.height - print(f" Puzzle: {actual_w_mm:.1f} x {actual_h_mm:.1f} mm -> scale {scale_w:.3f} x {scale_h:.3f}") - - # Pro každý puzzle spočítej scale individuálně - positions = [] - for clip in clips: - pw = clip.width * (target_w_pt / clip.width) - ph = clip.height * (target_h_pt / clip.height) - positions.append((pw, ph)) - - # Vertikální rozmístění — equal gaps (předpokládáme stejnou výšku obou) - ph0 = positions[0][1] - ph1 = positions[1][1] - gap0 = (page_h - ph0 - ph1) / 3 - gap1 = gap0 - - y0 = gap0 - y1 = gap0 + ph0 + gap1 - - for i, (doc_src, clip, (pw, ph)) in enumerate(zip(sources, clips, positions)): - x0 = (page_w - pw) / 2 - y_pos = y0 if i == 0 else y1 - page.show_pdf_page( - fitz.Rect(x0, y_pos, x0 + pw, y_pos + ph), - doc_src, 0, - clip=clip, - ) - - side_mm = ((page_w - positions[0][0]) / 2) / 72 * 25.4 - gap_mm = gap0 / 72 * 25.4 - print(f" Misto po stranach: {side_mm:.1f} mm | Mezera: {gap_mm:.1f} mm") - - -def main(): - parser = argparse.ArgumentParser(description="Aplikuje layout na 2 puzzle PDF") - parser.add_argument("pdf1", help="Prvni puzzle PDF") - parser.add_argument("pdf2", help="Druhy puzzle PDF") - parser.add_argument("vystup", help="Vystupni PDF") - parser.add_argument("--layout", default=DEFAULT_LAYOUT, help=f"Nazev layoutu (default: {DEFAULT_LAYOUT})") - args = parser.parse_args() - - if not LAYOUTS_JSON.exists(): - print(f"CHYBA: {LAYOUTS_JSON} nenalezen. Spust nejdrive 26_SaveLayout.py.", file=sys.stderr) - sys.exit(1) - - layouts = json.loads(LAYOUTS_JSON.read_text(encoding="utf-8")) - if args.layout not in layouts: - print(f"CHYBA: layout '{args.layout}' nenalezen v {LAYOUTS_JSON}", file=sys.stderr) - print(f"Dostupne layouty: {list(layouts.keys())}", file=sys.stderr) - sys.exit(1) - - layout = layouts[args.layout] - print(f"Layout: {args.layout}") - print(f"Cilova velikost: {layout['target_puzzle_width_mm']} x {layout['target_puzzle_height_mm']} mm") - - doc1 = fitz.open(args.pdf1) - doc2 = fitz.open(args.pdf2) - doc_out = fitz.open() - - apply_2_vertical(doc_out, [doc1, doc2], layout) - - doc_out.save(args.vystup) - doc1.close() - doc2.close() - doc_out.close() - print(f"Ulozeno: {args.vystup}") - - -if __name__ == "__main__": - main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/30_BatchCrop.py b/SběrDatRůzné/SudokuKiller/Testy/30_BatchCrop.py deleted file mode 100644 index 9c9a622..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/30_BatchCrop.py +++ /dev/null @@ -1,199 +0,0 @@ -""" -Batch ořez puzzle z MySQL. - -Pro každý řádek v sudoku_killer kde file_puzzle_cropped IS NULL: - - načte file_puzzle + crop_method - - ořízne podle metody - - uloží zpět do file_puzzle_cropped -""" - -# --------------------------------------------------------------------------- -# Nastavení — upravuj zde před spuštěním v PyCharm -# --------------------------------------------------------------------------- -WORKERS = 4 # počet paralelních procesů -LIMIT = None # None = vše; číslo (např. 20) = jen prvních N puzzle (pro testování) -BATCH = 200 # kolik oříznutých PDF uložit najednou do DB -DRY_RUN = False # True = jen ořez, nic se neuloží do DB -LOG_EVERY = 500 # vypiš stav do konzole každých N zpracovaných puzzle -# --------------------------------------------------------------------------- - -import sys -import json -import csv -from pathlib import Path -from concurrent.futures import ProcessPoolExecutor, as_completed - -import fitz -from tqdm import tqdm - -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny")) -from mysql_db import connect_mysql - -sys.stdout.reconfigure(encoding="utf-8") -sys.stderr.reconfigure(encoding="utf-8") - -ERRORS_CSV = Path(__file__).parent / "crop_errors.csv" - - -# --------------------------------------------------------------------------- -# Crop metody — přidat sem nové funkce pro nové metody -# --------------------------------------------------------------------------- - -def crop_raycast_auto(pdf_bytes: bytes, params: dict) -> bytes: - crop_margin = params.get("crop_margin_pt", 2) - - doc = fitz.open(stream=pdf_bytes, filetype="pdf") - page = doc[0] - paths = page.get_drawings() - y_mid = page.mediabox.height / 2 - - hit_h = [(p["rect"], p.get("width") or 0) for p in paths - if p["rect"].y0 <= y_mid <= p["rect"].y1] - if not hit_h: - raise ValueError("ray-cast: zadne kresby na y_mid") - - rects = [r for r, _ in hit_h] - x_left = min(r.x0 for r in rects) - x_right = max(r.x1 for r in rects) - top_cut = min(r.y0 for r in rects) - bot_cut = max(r.y1 for r in rects) - lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0) - lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0) - - clip = fitz.Rect( - x_left - lw_l / 2 - crop_margin, - top_cut - crop_margin, - x_right + lw_r / 2 + crop_margin, - bot_cut + crop_margin, - ) - - doc_new = fitz.open() - p = doc_new.new_page(width=clip.width, height=clip.height) - p.show_pdf_page(fitz.Rect(0, 0, clip.width, clip.height), doc, 0, clip=clip) - out = doc_new.tobytes() - doc.close() - doc_new.close() - return out - - -CROP_METHODS = { - "raycast_auto": crop_raycast_auto, -} - - -# --------------------------------------------------------------------------- -# Worker — spouští se v samostatném procesu -# --------------------------------------------------------------------------- - -def process_one(args): - puzzle_id, puzzle_number, pdf_bytes, method_name, params_json = args - try: - params = json.loads(params_json) if isinstance(params_json, str) else params_json - fn = CROP_METHODS.get(method_name) - if fn is None: - return puzzle_id, puzzle_number, None, f"neznama metoda: {method_name}" - cropped = fn(bytes(pdf_bytes), params) - return puzzle_id, puzzle_number, cropped, None - except Exception as e: - return puzzle_id, puzzle_number, None, str(e) - - -# --------------------------------------------------------------------------- -# Hlavní logika -# --------------------------------------------------------------------------- - -def fetch_todo(limit): - import pymysql.cursors - conn = connect_mysql(database="puzzle", cursorclass=pymysql.cursors.DictCursor) - cur = conn.cursor() - sql = """ - SELECT sk.id, sk.puzzle_number, sk.file_puzzle, - cm.name AS method_name, cm.params_json - FROM sudoku_killer sk - JOIN puzzle_crop_method cm ON sk.crop_method_id = cm.id - WHERE sk.file_puzzle_cropped IS NULL - ORDER BY sk.puzzle_number - """ - if limit: - sql += f" LIMIT {int(limit)}" - cur.execute(sql) - rows = cur.fetchall() - cur.close() - conn.close() - return rows - - -def save_cropped(updates: list[tuple]): - """updates = [(cropped_bytes, puzzle_id), ...]""" - import pymysql.cursors - conn = connect_mysql(database="puzzle", cursorclass=pymysql.cursors.DictCursor) - cur = conn.cursor() - cur.executemany( - "UPDATE sudoku_killer SET file_puzzle_cropped = %s WHERE id = %s", - updates, - ) - cur.close() - conn.close() - - -def main(): - print("Nacitam seznam puzzle k orizeni...") - rows = fetch_todo(LIMIT) - total = len(rows) - if total == 0: - print("Vsechny puzzle jsou jiz orizeny.") - return - print(f"Ke zpracovani: {total} puzzle | workers: {WORKERS} | batch: {BATCH} | dry-run: {DRY_RUN}") - - errors = [] - pending_saves = [] # [(cropped_bytes, puzzle_id)] - done = 0 - - tasks = [ - (r["id"], r["puzzle_number"], r["file_puzzle"], r["method_name"], r["params_json"]) - for r in rows - ] - - with ProcessPoolExecutor(max_workers=WORKERS) as executor: - futures = {executor.submit(process_one, t): t for t in tasks} - with tqdm(total=total, unit="puzzle") as bar: - for future in as_completed(futures): - puzzle_id, puzzle_number, cropped, err = future.result() - - if err: - errors.append({"puzzle_id": puzzle_id, "puzzle_number": puzzle_number, "chyba": err}) - tqdm.write(f" [CHYBA] puzzle #{puzzle_number}: {err}") - elif not DRY_RUN: - pending_saves.append((cropped, puzzle_id)) - if len(pending_saves) >= BATCH: - save_cropped(pending_saves) - pending_saves.clear() - - done += 1 - bar.update(1) - bar.set_postfix(chyby=len(errors), ulozeno=done - len(errors) - len(pending_saves)) - - if done % LOG_EVERY == 0: - zbyvá = total - done - pct = done / total * 100 - tqdm.write(f" >> {done}/{total} ({pct:.1f}%) | puzzle #{puzzle_number} | zbyvá: {zbyvá} | chyby: {len(errors)}") - - # Uložit zbývající - if pending_saves and not DRY_RUN: - save_cropped(pending_saves) - - if errors: - with open(ERRORS_CSV, "w", newline="", encoding="utf-8") as f: - w = csv.DictWriter(f, fieldnames=["puzzle_id", "puzzle_number", "chyba"]) - w.writeheader() - w.writerows(errors) - print(f"\nChyby: {len(errors)} — viz {ERRORS_CSV}") - else: - print("\nVse bez chyb.") - - ok = done - len(errors) - print(f"Hotovo: {ok} orizeno, {len(errors)} chyb, {total - done} preskoceno.") - - -if __name__ == "__main__": - main() diff --git a/SběrDatRůzné/SudokuKiller/Testy/export_original_pdf.py b/SběrDatRůzné/SudokuKiller/Testy/export_original_pdf.py deleted file mode 100644 index 439eed9..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/export_original_pdf.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Exportuje originální PDF puzzle z tabulky sudoku_killer pro porovnání. -""" - -import sys -from pathlib import Path - -sys.stdout.reconfigure(encoding="utf-8") -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny")) - -from mysql_db import connect_mysql - -OUTPUT_DIR = Path(__file__).parent - -conn = connect_mysql(database="puzzle") -cur = conn.cursor() -cur.execute( - "SELECT puzzle_number, file_puzzle, file_solution " - "FROM sudoku_killer WHERE puzzle_number = 31414" -) -row = cur.fetchone() -cur.close() -conn.close() - -if not row: - print("Puzzle 31414 nenalezen v sudoku_killer.") -else: - num, pdf_puzzle, pdf_solution = row - if pdf_puzzle: - path = OUTPUT_DIR / f"original_{num}_puzzle.pdf" - path.write_bytes(pdf_puzzle) - print(f"Uloženo: {path}") - if pdf_solution: - path = OUTPUT_DIR / f"original_{num}_solution.pdf" - path.write_bytes(pdf_solution) - print(f"Uloženo: {path}") diff --git a/SběrDatRůzné/SudokuKiller/Testy/import_do_mysql.py b/SběrDatRůzné/SudokuKiller/Testy/import_do_mysql.py deleted file mode 100644 index 7d072e7..0000000 --- a/SběrDatRůzné/SudokuKiller/Testy/import_do_mysql.py +++ /dev/null @@ -1,151 +0,0 @@ -""" -Naimportuje stažené PDF puzzle z DownloadedPuzzles/ do MySQL tabulky sudoku_killer. - -Spuštění: - python import_do_mysql.py # přeskočí již existující (podle puzzle_number) - python import_do_mysql.py --all # reimportuje vše (přepíše existující) -""" - -import re -import sys -import argparse -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny")) -from mysql_db import connect_mysql - -sys.stdout.reconfigure(encoding="utf-8") -sys.stderr.reconfigure(encoding="utf-8") - -SAVE_DIR = Path(__file__).parent / "DownloadedPuzzles" - -# 2009-01-01 Puzzle SudokuKiller 1 [difficulty 5 of 10] [average solving time 47 min].pdf -FILENAME_RE = re.compile( - r"^(?P\d{4}-\d{2}-\d{2}) Puzzle (?PSudokuKillerGreaterThan|SudokuKiller) (?P\d+) " - r"\[difficulty (?P\d+) of (?P\d+)\] " - r"\[average solving time (?P