notebookvb

This commit is contained in:
Vladimir Buzalka
2026-05-09 08:29:35 +02:00
parent 5bfd4176e4
commit 77d12c68d7
28 changed files with 3408 additions and 0 deletions
@@ -0,0 +1,58 @@
"""
Vybere 2 calcudoku z MySQL a vygeneruje PDF.
Použití: python tisk_calcudoku.py [YYYY-MM-DD] [obtiznost1] [obtiznost2]
Výchozí: dnešní datum, 5x5 a 6x6.
"""
import json
import sys
from datetime import date
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
sys.path.insert(0, str(Path(__file__).parent))
from mysql_db import connect_mysql
from vykresli_calcudoku import generate_pdf
PUZZLE_DATE = sys.argv[1] if len(sys.argv) > 1 else date.today().isoformat()
DIFFICULTIES = sys.argv[2:4] if len(sys.argv) > 3 else ["5x5", "6x6"]
OUTPUT = Path(__file__).parent.parent / f"calcudoku_{PUZZLE_DATE}.pdf"
def main():
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
placeholders = ", ".join(["%s"] * len(DIFFICULTIES))
cur.execute(
f"SELECT difficulty, puzzle, solution, extra FROM puzzles "
f"WHERE game_type='calcudoku' AND puzzle_date=%s AND difficulty IN ({placeholders}) "
f"ORDER BY FIELD(difficulty, {placeholders})",
[PUZZLE_DATE] + DIFFICULTIES + DIFFICULTIES,
)
rows = cur.fetchall()
cur.close()
conn.close()
if not rows:
print(f"Žádná calcudoku pro {PUZZLE_DATE} / {DIFFICULTIES}")
return
puzzles = []
for difficulty, cages_str, solution_str, extra_json in rows:
extra = json.loads(extra_json)
puzzles.append({
"difficulty": difficulty,
"cages_str": cages_str,
"solution_str": solution_str,
"grid_size": extra["grid_size"],
"puzzle_date": PUZZLE_DATE,
})
generate_pdf(puzzles, OUTPUT)
print(f"PDF uloženo: {OUTPUT}")
if __name__ == "__main__":
main()
@@ -0,0 +1,111 @@
"""
Jednorázový import: stáhne všechny Str8ts z gameLevels (celý rok) a uloží do MySQL.
Přeskočí záznamy, které už existují (INSERT IGNORE).
"""
import asyncio
import json
import sys
from datetime import date
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
sys.stderr.reconfigure(encoding="utf-8")
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from mysql_db import connect_mysql
from playwright.async_api import async_playwright
URL = "https://www.solitaire.org/daily-str8ts/"
YEAR = date.today().year
DIFFICULTIES = ["easy", "medium", "hard"]
async def fetch_all_levels() -> dict:
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
game_url = None
for frame in page.frames:
if frame.url != page.url and frame.url.strip() not in ("", "about:blank"):
game_url = frame.url
break
if not game_url:
iframe_src = await page.get_attribute("iframe", "src")
if iframe_src:
game_url = iframe_src if iframe_src.startswith("http") else f"https://www.solitaire.org{iframe_src}"
await page.close()
game_page = await context.new_page()
target_url = game_url if game_url else URL
print(f"Načítám hru: {target_url} ...")
await game_page.goto(target_url, wait_until="networkidle", timeout=60_000)
data = await game_page.evaluate("""() => {
const result = {};
for (const diff of ['easy', 'medium', 'hard']) {
if (gameLevels[diff]) {
result[diff] = gameLevels[diff];
}
}
return result;
}""")
await browser.close()
return data
def save_all(data: dict, year: int) -> tuple[int, int]:
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
inserted = 0
skipped = 0
# Všechny MM-DD klíče z libovolné obtížnosti
all_mmdd = set()
for diff_data in data.values():
all_mmdd.update(diff_data.keys())
for mmdd in sorted(all_mmdd):
puzzle_date = f"{year}-{mmdd}"
for diff in DIFFICULTIES:
entry = data.get(diff, {}).get(mmdd)
if not entry:
continue
cur.execute(
"INSERT IGNORE INTO puzzles "
"(game_type, difficulty, puzzle_date, puzzle, solution, extra, source) "
"VALUES (%s, %s, %s, %s, %s, %s, %s)",
(
"str8ts", diff, puzzle_date,
entry["puzzle"], entry["solution"],
json.dumps({"bw": entry["bw"]}),
"solitaire.org",
),
)
if cur.rowcount:
inserted += 1
else:
skipped += 1
conn.commit()
cur.close()
conn.close()
return inserted, skipped
async def main():
data = await fetch_all_levels()
total_days = len(set(k for d in data.values() for k in d.keys()))
print(f"gameLevels obsahuje {total_days} dní, {len(data)} obtížností")
inserted, skipped = save_all(data, YEAR)
print(f"Hotovo — vloženo: {inserted}, přeskočeno (existující): {skipped}")
if __name__ == "__main__":
asyncio.run(main())
+107
View File
@@ -0,0 +1,107 @@
# DailySudokuKiller — technické poznámky
## Hlavní skripty
| Skript | Popis |
|--------|-------|
| `stahni_killer_structured.py` | Stáhne strukturovaná data (cage definice + řešení) z dailykillersudoku.com do MySQL tabulky `puzzles`. Průběžně ukládá zálohu do `killer_structured_data.json` |
| `vykresli_killer_sudoku.py` | Vygeneruje PDF z dat v MySQL — Killer Sudoku zadání + řešení, vektorové, vzhledem identické s originálem z webu |
Ostatní (stará pipeline s PDF bloby, průzkumné skripty, testovací PDF) je v podadresáři `Testy/`.
## Zdroj dat
Web: https://www.dailykillersudoku.com/
Každý puzzle má stránku `/puzzle/{N}` s inline JSON daty v HTML:
```javascript
DKS.puzzle = new DKS.Puzzle({
"id": 376,
"date": "2009-05-04",
"difficulty": 4,
"board_base64": "AZoACQAE...",
"solution_base64": "AJoICQIG...",
"puzzle_type": 1
})
```
## Dekódování base64
### board_base64
- 2 bajty header (puzzle_type, flags)
- 81 × 2 bajty = 162 bajtů — cage ID pro každou buňku (uint16 big-endian)
- N bajtů — součet pro každou klec (1 bajt = max 255)
### solution_base64
- 2 bajty header
- 81 bajtů — čísla řešení (řádek po řádku)
## Typy puzzle
| puzzle_type | game_type v DB | Popis |
|-------------|----------------|-------|
| 1 | `killer_sudoku` | Killer Sudoku — klece se součty |
| 2 | `killer_sudoku_gt` | Greater-Than Killer Sudoku — klece + nerovnosti |
## Obtížnost
Škála 110 (z webu), uložena v `difficulty`.
## MySQL — sdílená tabulka `puzzles`
Strukturovaná data:
- `game_type` = `'killer_sudoku'` / `'killer_sudoku_gt'`
- `difficulty` = `'1'``'10'`
- `puzzle` = klece ve formátu `sum,r0c1r0c2|sum,r3c4r3c5|...` (`VARCHAR(1000)`)
- `solution` = flat string 81 číslic (`VARCHAR(1000)`)
- `extra` = `{"grid_size": 9, "puzzle_number": 376, "original_difficulty": 4}`
- `source` = `'dailykillersudoku.com'`
**Pozor:** `puzzle` a `solution` byly původně `VARCHAR(200)` — nedostačovalo, cage stringy mají až ~500 znaků. Sloupce rozšířeny na `VARCHAR(1000)`.
## Stav stažených dat
- ~28 700 puzzlů (131 416)
- Killer Sudoku: ~17 200, Greater-Than: ~11 500
- Zdrojová data v `killer_structured_data.json` (záloha pro případ MySQL chyby)
## PDF rendering — pořadí vrstev
Klíčové pro vzhled identický s originálem z webu (`vykresli_killer_sudoku.py`):
1. **Bílé pozadí**
2. **Čísla řešení** (jen pro řešovou variantu, šedě)
3. **Tečkované ohraničení klecí** — odsazené dovnitř buněk o `cell * 0.10`, slévání segmentů v rámci stejné klece (jeden `c.line()` přes víc buněk → pattern teček neresetuje)
4. **Tenká plná mřížka** — všechny řádky/sloupce, šedě (překryje přesahy tečkovaných v křížení)
5. **Tlusté čáry 3×3** + obvod, černě
6. **Popisky součtů** — bíle podsvícené, ArialBold
### Vnější vs vnitřní rohy klecí
Při slévání tečkovaných segmentů endpoints buďto **zkrátit** o inset (vnější roh) nebo **prodloužit** o inset (vnitřní roh — kde klec zahýbá L-tvarem).
Detekce: pro horizontální segment top borderu od sloupce `s` do `co` (exclusive):
- Levý konec vnitřní roh = `cage_map[r][s-1] == cid` → prodloužit
- Pravý konec vnitřní roh = `cage_map[r][co] == cid` → prodloužit
Bez tohoto fixu se na vnitřních rozích L-tvarů objevují viditelné mezery.
## Závislosti
- `requests` — HTTP fetch (bez Playwright, data jsou inline v HTML)
- `reportlab` — PDF generation (vektorová grafika)
- `tqdm` — progress bar
- `mysql_db` (lokální Knihovny) — DB připojení
## Použití
```bash
# Stažení dat (s pokračováním z JSON pokud existuje)
python stahni_killer_structured.py --run
# Pouze import už stažených JSON dat do MySQL
python stahni_killer_structured.py --import
# Vygenerování PDF pro puzzle 31414
python vykresli_killer_sudoku.py
```
@@ -0,0 +1,42 @@
from pathlib import Path
from pypdf import PdfReader, PdfWriter, Transformation, PageObject
INPUT_PDF = Path(r"2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf")
OUTPUT_PDF = Path(r"sudoku_50pct_A4.pdf")
# A4 v bodech, 72 dpi
A4_WIDTH = 595.2756
A4_HEIGHT = 841.8898
SCALE = 0.5
reader = PdfReader(str(INPUT_PDF))
source_page = reader.pages[0]
source_width = float(source_page.mediabox.width)
source_height = float(source_page.mediabox.height)
# Nová prázdná A4 stránka
new_page = PageObject.create_blank_page(
width=A4_WIDTH,
height=A4_HEIGHT
)
# Výpočet pozice pro vycentrování
target_width = source_width * SCALE
target_height = source_height * SCALE
x = (A4_WIDTH - target_width) / 2
y = (A4_HEIGHT - target_height) / 2
# Vložit původní PDF stránku jako vektorový objekt, zmenšený na 50 %
transform = Transformation().scale(SCALE).translate(x, y)
new_page.merge_transformed_page(source_page, transform, expand=False)
writer = PdfWriter()
writer.add_page(new_page)
with OUTPUT_PDF.open("wb") as f:
writer.write(f)
print(f"Hotovo: {OUTPUT_PDF}")
@@ -0,0 +1,137 @@
"""
Batch crop Killer Sudoku PDF souborů — odstraní nadpis nahoře a copyright dole.
Zachovává vektorový obsah (cairo-generované PDF).
Použití:
python 20_CropPuzzles.py <vstup_dir> <vystup_dir> [--workers N]
"""
import argparse
import csv
import sys
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
import fitz # PyMuPDF
from tqdm import tqdm
def detect_cuts(paths):
"""Vrátí (top_cut, bot_cut) nebo (None, None) pokud detekce selže."""
ys0 = sorted(set(round(p["rect"].y0) for p in paths))
ys1 = sorted(set(round(p["rect"].y1) for p in paths))
top_cut = None
for i in range(1, len(ys0)):
if ys0[i] - ys0[i - 1] > 10:
top_cut = (ys0[i - 1] + ys0[i]) / 2
break
bot_cut = None
for i in range(len(ys1) - 1, 0, -1):
if ys1[i] - ys1[i - 1] > 5:
bot_cut = (ys1[i - 1] + ys1[i]) / 2
break
return top_cut, bot_cut
def crop_one(args):
"""Zpracuje jeden soubor. Vrátí (src_path, status, detail)."""
src_path, dst_path = args
try:
doc_src = fitz.open(str(src_path))
page = doc_src[0]
paths = page.get_drawings()
if not paths:
doc_src.close()
return str(src_path), "anomalie", "žádné kresby (get_drawings prázdný)"
top_cut, bot_cut = detect_cuts(paths)
if top_cut is None or bot_cut is None:
doc_src.close()
return str(src_path), "anomalie", f"gap detekce selhala (top={top_cut}, bot={bot_cut})"
page_w = page.mediabox.width
clip = fitz.Rect(0, top_cut, page_w, bot_cut)
doc_new = fitz.open()
p = doc_new.new_page(width=clip.width, height=clip.height)
p.show_pdf_page(fitz.Rect(0, 0, clip.width, clip.height), doc_src, 0, clip=clip)
dst_path.parent.mkdir(parents=True, exist_ok=True)
doc_new.save(str(dst_path))
doc_src.close()
doc_new.close()
return str(src_path), "ok", ""
except Exception as e:
return str(src_path), "chyba", str(e)
def main():
parser = argparse.ArgumentParser(description="Batch crop Killer Sudoku PDF")
parser.add_argument("vstup", help="Vstupní adresář s PDF soubory")
parser.add_argument("vystup", help="Výstupní adresář pro oříznuté PDF")
parser.add_argument("--workers", type=int, default=4, help="Počet procesů (default: 4)")
args = parser.parse_args()
src_dir = Path(args.vstup)
dst_dir = Path(args.vystup)
if not src_dir.is_dir():
print(f"Chyba: vstupní adresář neexistuje: {src_dir}", file=sys.stderr)
sys.exit(1)
dst_dir.mkdir(parents=True, exist_ok=True)
all_pdfs = sorted(src_dir.rglob("*.pdf"))
if not all_pdfs:
print("Žádné PDF soubory nenalezeny.")
sys.exit(0)
# Přeskočit již zpracované
tasks = []
skipped = 0
for src in all_pdfs:
rel = src.relative_to(src_dir)
dst = dst_dir / rel
if dst.exists():
skipped += 1
else:
tasks.append((src, dst))
print(f"Celkem PDF: {len(all_pdfs)}, přeskočeno (existují): {skipped}, ke zpracování: {len(tasks)}")
if not tasks:
print("Vše již zpracováno.")
return
errors_csv = dst_dir / "errors.csv"
errors = []
with ProcessPoolExecutor(max_workers=args.workers) as executor:
futures = {executor.submit(crop_one, t): t for t in tasks}
with tqdm(total=len(tasks), unit="soubor") as bar:
for future in as_completed(futures):
src_path, status, detail = future.result()
if status != "ok":
errors.append({"soubor": src_path, "typ": status, "detail": detail})
bar.update(1)
bar.set_postfix(chyby=len(errors))
if errors:
with open(errors_csv, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["soubor", "typ", "detail"])
writer.writeheader()
writer.writerows(errors)
print(f"\nChyby/anomálie: {len(errors)} — viz {errors_csv}")
else:
print("\nVšechny soubory zpracovány bez chyb.")
if __name__ == "__main__":
main()
@@ -0,0 +1,61 @@
"""
Crop Killer Sudoku PDF ray-casting metodou:
1. Horizontální paprsek na y_mid → najde x_left, x_right mřížky
2. Vertikální paprsek podél x_left → najde top_cut, bot_cut mřížky
Výsledek: oříznuté PDF jen s mřížkou + malý bílý rámeček (MARGIN).
"""
import fitz
from pathlib import Path
MARGIN = 4 # pt bílého rámečku kolem mřížky
SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/DailySudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf")
DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/DailySudokuKiller/Testy/cropped_raycast.pdf")
def crop_raycast(src_path: Path, dst_path: Path, margin: float = MARGIN):
doc = fitz.open(str(src_path))
page = doc[0]
paths = page.get_drawings()
pw = page.mediabox.width
ph = page.mediabox.height
y_mid = ph / 2
# Krok 1: horizontální paprsek na y_mid → x_left, x_right
hit_h = [p["rect"] for p in paths if p["rect"].y0 <= y_mid <= p["rect"].y1]
if not hit_h:
raise ValueError("Horizontální paprsek nenašel žádné kresby na y_mid")
# Elementy z horizontálního paprsku jsou výhradně mřížka (nadpis/copyright
# jsou daleko od y_mid) — jejich y rozsah přímo dává top/bot hranici mřížky.
x_left = min(r.x0 for r in hit_h)
x_right = max(r.x1 for r in hit_h)
top_cut = min(r.y0 for r in hit_h)
bot_cut = max(r.y1 for r in hit_h)
print(f"x_left={x_left:.1f} x_right={x_right:.1f}")
print(f"top_cut={top_cut:.1f} bot_cut={bot_cut:.1f}")
print(f"stránka: {pw:.1f} x {ph:.1f} pt")
clip = fitz.Rect(
x_left - margin,
top_cut - margin,
x_right + margin,
bot_cut + margin,
)
clip_w = clip.width
clip_h = clip.height
doc_new = fitz.open()
p = doc_new.new_page(width=clip_w, height=clip_h)
p.show_pdf_page(fitz.Rect(0, 0, clip_w, clip_h), doc, 0, clip=clip)
doc_new.save(str(dst_path))
doc.close()
doc_new.close()
print(f"Uloženo: {dst_path} ({clip_w:.1f} x {clip_h:.1f} pt)")
crop_raycast(SRC, DST)
@@ -0,0 +1,104 @@
"""
Stáhne 10 puzzle z MySQL (tabulka sudoku_killer), ořízne ray-cast metodou
a uloží do Testy/verify/ pro vizuální verifikaci.
"""
import sys
from pathlib import Path
import fitz
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "Knihovny"))
from mysql_db import connect_mysql
import pymysql.cursors
sys.stdout.reconfigure(encoding="utf-8")
sys.stderr.reconfigure(encoding="utf-8")
OUT_DIR = Path(__file__).parent / "verify"
OUT_DIR.mkdir(exist_ok=True)
MARGIN = 2 # pt — minimální rámeček
def crop_raycast(pdf_bytes: bytes) -> bytes:
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
page = doc[0]
paths = page.get_drawings()
ph = page.mediabox.height
y_mid = ph / 2
hit_h = [(p["rect"], p.get("width") or 0) for p in paths
if p["rect"].y0 <= y_mid <= p["rect"].y1]
if not hit_h:
raise ValueError("Horizontální paprsek nenašel žádné kresby")
rects = [r for r, _ in hit_h]
x_left = min(r.x0 for r in rects)
x_right = max(r.x1 for r in rects)
top_cut = min(r.y0 for r in rects)
bot_cut = max(r.y1 for r in rects)
# lineWidth svislých okrajových čar — souřadnice jsou středy, ne vizuální okraje
lw_left = next((lw for r, lw in hit_h if r.x0 == x_left), 0)
lw_right = next((lw for r, lw in hit_h if r.x1 == x_right), 0)
vis_x_left = x_left - lw_left / 2
vis_x_right = x_right + lw_right / 2
# top_cut / bot_cut jsou již vnější vizuální hrany (shodují se s okrajem horizontálních čar)
clip = fitz.Rect(
vis_x_left - MARGIN,
top_cut - MARGIN,
vis_x_right + MARGIN,
bot_cut + MARGIN,
)
doc_new = fitz.open()
p = doc_new.new_page(width=clip.width, height=clip.height)
p.show_pdf_page(fitz.Rect(0, 0, clip.width, clip.height), doc, 0, clip=clip)
out = doc_new.tobytes()
doc.close()
doc_new.close()
return out
def main():
import pymysql.cursors
conn = connect_mysql(database="puzzle", cursorclass=pymysql.cursors.DictCursor)
cursor = conn.cursor()
cursor.execute("""
SELECT puzzle_number, puzzle_date, difficulty, file_puzzle
FROM sudoku_killer
WHERE file_puzzle IS NOT NULL
ORDER BY puzzle_number
LIMIT 10
""")
rows = cursor.fetchall()
cursor.close()
conn.close()
print(f"Staženo {len(rows)} záznamů z DB.")
for row in rows:
num = row["puzzle_number"]
date = row["puzzle_date"]
diff = row["difficulty"]
pdf_bytes = bytes(row["file_puzzle"])
try:
cropped = crop_raycast(pdf_bytes)
out_path = OUT_DIR / f"{date} Puzzle SudokuKiller {num} [diff {diff}] cropped.pdf"
out_path.write_bytes(cropped)
print(f" OK #{num}{out_path.name}")
except Exception as e:
print(f" CHYBA #{num}: {e}", file=sys.stderr)
print(f"\nHotovo. Soubory v: {OUT_DIR}")
if __name__ == "__main__":
main()
@@ -0,0 +1,84 @@
"""
Ořízne vzorový puzzle (ray-cast) a vygeneruje jedno PDF s 7 stránkami A4,
každá stránka ukazuje puzzle zmenšený o 1070 % (krok 10 %).
"""
import fitz
from pathlib import Path
SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/DailySudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf")
DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/DailySudokuKiller/Testy/size_preview.pdf")
A4_W = 595.276
A4_H = 841.890
MARGIN = 2 # pt bílý rámeček kolem puzzlu po ořezu
def detect_clip(page) -> fitz.Rect:
paths = page.get_drawings()
ph = page.mediabox.height
y_mid = ph / 2
hit_h = [(p["rect"], p.get("width") or 0) for p in paths
if p["rect"].y0 <= y_mid <= p["rect"].y1]
if not hit_h:
raise ValueError("Detekce hranic selhala")
rects = [r for r, _ in hit_h]
x_left = min(r.x0 for r in rects)
x_right = max(r.x1 for r in rects)
top_cut = min(r.y0 for r in rects)
bot_cut = max(r.y1 for r in rects)
lw_left = next((lw for r, lw in hit_h if r.x0 == x_left), 0)
lw_right = next((lw for r, lw in hit_h if r.x1 == x_right), 0)
return fitz.Rect(
x_left - lw_left / 2 - MARGIN,
top_cut - MARGIN,
x_right + lw_right / 2 + MARGIN,
bot_cut + MARGIN,
)
def main():
doc_src = fitz.open(str(SRC))
page_src = doc_src[0]
clip = detect_clip(page_src)
puzzle_w = clip.width
puzzle_h = clip.height
print(f"Oříznutý puzzle: {puzzle_w:.1f} × {puzzle_h:.1f} pt")
doc_out = fitz.open()
scales = [0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70]
for scale in scales:
pw = puzzle_w * scale
ph = puzzle_h * scale
# Vycentrovat na A4
x0 = (A4_W - pw) / 2
y0 = (A4_H - ph) / 2
page = doc_out.new_page(width=A4_W, height=A4_H)
page.show_pdf_page(
fitz.Rect(x0, y0, x0 + pw, y0 + ph),
doc_src, 0,
clip=clip,
)
pct = int(scale * 100)
label = f"{pct} % ({pw:.0f} × {ph:.0f} pt = {pw/72*25.4:.0f} × {ph/72*25.4:.0f} mm)"
page.insert_text((30, 30), label, fontsize=11, color=(0.4, 0.4, 0.4))
print(f" Stránka {pct}%: puzzle {pw:.0f}×{ph:.0f} pt ({pw/72*25.4:.0f}×{ph/72*25.4:.0f} mm)")
doc_out.save(str(DST))
doc_src.close()
doc_out.close()
print(f"\nUloženo: {DST}")
if __name__ == "__main__":
main()
@@ -0,0 +1,73 @@
"""
Ukázka 2 puzzle vedle sebe na A4 — varianty 93 % (mezera 10 pt) a 89 % (mezera 20 pt).
Výsledek: 2stránkové PDF.
"""
import fitz
from pathlib import Path
SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/DailySudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf")
DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/DailySudokuKiller/Testy/two_puzzles.pdf")
A4_W = 595.276
A4_H = 841.890
CROP_MARGIN = 2
def detect_clip(page) -> fitz.Rect:
paths = page.get_drawings()
y_mid = page.mediabox.height / 2
hit_h = [(p["rect"], p.get("width") or 0) for p in paths
if p["rect"].y0 <= y_mid <= p["rect"].y1]
rects = [r for r, _ in hit_h]
x_left = min(r.x0 for r in rects)
x_right = max(r.x1 for r in rects)
top_cut = min(r.y0 for r in rects)
bot_cut = max(r.y1 for r in rects)
lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0)
lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0)
return fitz.Rect(
x_left - lw_l / 2 - CROP_MARGIN,
top_cut - CROP_MARGIN,
x_right + lw_r / 2 + CROP_MARGIN,
bot_cut + CROP_MARGIN,
)
def add_page(doc_out, doc_src, clip, gap_pt):
scale = (A4_W - 3 * gap_pt) / 2 / clip.width
pw = clip.width * scale
ph = clip.height * scale
y0 = (A4_H - ph) / 2 # vertikálně vycentrovat
page = doc_out.new_page(width=A4_W, height=A4_H)
for i in range(2):
x0 = gap_pt + i * (pw + gap_pt)
page.show_pdf_page(fitz.Rect(x0, y0, x0 + pw, y0 + ph), doc_src, 0, clip=clip)
pct = scale * 100
label = (f"mezera {gap_pt:.0f} pt | měřítko {pct:.0f} % | "
f"puzzle {pw:.0f} × {ph:.0f} pt = {pw/72*25.4:.0f} × {ph/72*25.4:.0f} mm")
page.insert_text((30, 25), label, fontsize=9, color=(0.4, 0.4, 0.4))
def main():
doc_src = fitz.open(str(SRC))
clip = detect_clip(doc_src[0])
print(f"Oříznutý puzzle: {clip.width:.1f} × {clip.height:.1f} pt")
doc_out = fitz.open()
for gap in (10, 20):
add_page(doc_out, doc_src, clip, gap)
scale = (A4_W - 3 * gap) / 2 / clip.width
print(f" gap={gap} pt -> meritko {scale*100:.0f} % puzzle {clip.width*scale:.0f}x{clip.height*scale:.0f} pt")
doc_out.save(str(DST))
doc_src.close()
doc_out.close()
print(f"\nUloženo: {DST}")
if __name__ == "__main__":
main()
@@ -0,0 +1,76 @@
"""
2 puzzle na A4 — 100 %, pod sebou, horizontálně vycentrované.
Místo vlevo/vpravo zůstává pro poznámky.
"""
import fitz
from pathlib import Path
SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/DailySudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf")
DST = Path(r"U:/ordinaceprojekt/SběrDatRůzné/DailySudokuKiller/Testy/two_vertical_110.pdf")
A4_W = 595.276
A4_H = 841.890
CROP_MARGIN = 2
SCALE = 1.10
def detect_clip(page) -> fitz.Rect:
paths = page.get_drawings()
y_mid = page.mediabox.height / 2
hit_h = [(p["rect"], p.get("width") or 0) for p in paths
if p["rect"].y0 <= y_mid <= p["rect"].y1]
rects = [r for r, _ in hit_h]
x_left = min(r.x0 for r in rects)
x_right = max(r.x1 for r in rects)
top_cut = min(r.y0 for r in rects)
bot_cut = max(r.y1 for r in rects)
lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0)
lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0)
return fitz.Rect(
x_left - lw_l / 2 - CROP_MARGIN,
top_cut - CROP_MARGIN,
x_right + lw_r / 2 + CROP_MARGIN,
bot_cut + CROP_MARGIN,
)
def main():
doc_src = fitz.open(str(SRC))
clip = detect_clip(doc_src[0])
pw = clip.width * SCALE
ph = clip.height * SCALE
# Horizontální pozice — vycentrovat na A4
x0 = (A4_W - pw) / 2
# Vertikální rozdělení: 3 mezery (nahoře, mezi, dole)
gap = (A4_H - 2 * ph) / 3
y_top = gap
y_bot = gap + ph + gap
side_space = x0 # místo vlevo/vpravo pro poznámky
print(f"Puzzle: {pw:.1f} x {ph:.1f} pt ({pw/72*25.4:.0f} x {ph/72*25.4:.0f} mm)")
print(f"Meritko: {SCALE*100:.0f} %")
print(f"Misto vlevo/vpravo: {side_space:.1f} pt ({side_space/72*25.4:.0f} mm)")
print(f"Mezera mezi puzzle: {gap:.1f} pt ({gap/72*25.4:.0f} mm)")
doc_out = fitz.open()
page = doc_out.new_page(width=A4_W, height=A4_H)
for y0_pos in (y_top, y_bot):
page.show_pdf_page(
fitz.Rect(x0, y0_pos, x0 + pw, y0_pos + ph),
doc_src, 0,
clip=clip,
)
doc_out.save(str(DST))
doc_src.close()
doc_out.close()
print(f"Ulozeno: {DST}")
if __name__ == "__main__":
main()
@@ -0,0 +1,99 @@
"""
Změří finální puzzle, spočítá layout "2PuzzleOnA4" a uloží do layouts.json.
"""
import json
import fitz
from pathlib import Path
SRC = Path(r"U:/ordinaceprojekt/SběrDatRůzné/DailySudokuKiller/Testy/2009-05-04 Puzzle SudokuKiller 376 [difficulty 4 of 10] [average solving time 30 min].pdf")
JSON_PATH = Path(r"U:/ordinaceprojekt/SběrDatRůzné/DailySudokuKiller/layouts.json")
A4_W_PT = 595.276
A4_H_PT = 841.890
CROP_MARGIN = 2
TARGET_SCALE = 1.10 # 110 % — to co se nám líbilo
def pt_to_mm(pt):
return round(pt / 72 * 25.4, 2)
def detect_clip(page) -> fitz.Rect:
paths = page.get_drawings()
y_mid = page.mediabox.height / 2
hit_h = [(p["rect"], p.get("width") or 0) for p in paths
if p["rect"].y0 <= y_mid <= p["rect"].y1]
rects = [r for r, _ in hit_h]
x_left = min(r.x0 for r in rects)
x_right = max(r.x1 for r in rects)
top_cut = min(r.y0 for r in rects)
bot_cut = max(r.y1 for r in rects)
lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0)
lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0)
return fitz.Rect(
x_left - lw_l / 2 - CROP_MARGIN,
top_cut - CROP_MARGIN,
x_right + lw_r / 2 + CROP_MARGIN,
bot_cut + CROP_MARGIN,
)
def main():
doc = fitz.open(str(SRC))
clip = detect_clip(doc[0])
doc.close()
raw_w_mm = pt_to_mm(clip.width)
raw_h_mm = pt_to_mm(clip.height)
target_w_mm = round(pt_to_mm(clip.width * TARGET_SCALE), 2)
target_h_mm = round(pt_to_mm(clip.height * TARGET_SCALE), 2)
target_w_pt = clip.width * TARGET_SCALE
target_h_pt = clip.height * TARGET_SCALE
gap_pt = (A4_H_PT - 2 * target_h_pt) / 3
side_pt = (A4_W_PT - target_w_pt) / 2
layout = {
"2PuzzleOnA4": {
"description": "2 puzzle pod sebou, horizontalne vycentrovane, misto po stranach na vypocty",
"page": {
"format": "A4",
"width_pt": A4_W_PT,
"height_pt": A4_H_PT
},
"count": 2,
"arrangement": "vertical",
"horizontal_align": "center",
"vertical_distribution": "equal_gaps",
"target_puzzle_width_mm": target_w_mm,
"target_puzzle_height_mm": target_h_mm,
"crop_margin_pt": CROP_MARGIN,
"info": {
"sample_raw_puzzle_mm": f"{raw_w_mm} x {raw_h_mm}",
"scale_used_for_sample": TARGET_SCALE,
"side_margin_mm": pt_to_mm(side_pt),
"gap_between_puzzles_mm": pt_to_mm(gap_pt)
}
}
}
# Načíst existující JSON a přidat/přepsat klíč
if JSON_PATH.exists():
existing = json.loads(JSON_PATH.read_text(encoding="utf-8"))
existing.update(layout)
layout = existing
JSON_PATH.write_text(json.dumps(layout, indent=2, ensure_ascii=False), encoding="utf-8")
print(f"Ulozeno: {JSON_PATH}")
print(f" Surove puzzle: {raw_w_mm} x {raw_h_mm} mm")
print(f" Cilova velikost: {target_w_mm} x {target_h_mm} mm")
print(f" Misto po stranach: {pt_to_mm(side_pt):.1f} mm")
print(f" Mezera mezi puzzle: {pt_to_mm(gap_pt):.1f} mm")
if __name__ == "__main__":
main()
@@ -0,0 +1,133 @@
"""
Načte layout z layouts.json a aplikuje ho na 2 vstupní PDF soubory.
Použití:
python 27_ApplyLayout.py <pdf1> <pdf2> <vystup.pdf> [--layout 2PuzzleOnA4]
Skript si sám detekuje hranice každého puzzle (ray-cast), spočítá
scale z aktuální velikosti vs. cílové velikosti v JSON a rozmístí je.
"""
import sys
import json
import argparse
import fitz
from pathlib import Path
LAYOUTS_JSON = Path(__file__).parent.parent / "layouts.json"
DEFAULT_LAYOUT = "2PuzzleOnA4"
CROP_MARGIN_FALLBACK = 2
def detect_clip(page, crop_margin) -> fitz.Rect:
paths = page.get_drawings()
y_mid = page.mediabox.height / 2
hit_h = [(p["rect"], p.get("width") or 0) for p in paths
if p["rect"].y0 <= y_mid <= p["rect"].y1]
if not hit_h:
raise ValueError("Ray-cast detekce selhala — zadne kresby na y_mid")
rects = [r for r, _ in hit_h]
x_left = min(r.x0 for r in rects)
x_right = max(r.x1 for r in rects)
top_cut = min(r.y0 for r in rects)
bot_cut = max(r.y1 for r in rects)
lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0)
lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0)
return fitz.Rect(
x_left - lw_l / 2 - crop_margin,
top_cut - crop_margin,
x_right + lw_r / 2 + crop_margin,
bot_cut + crop_margin,
)
def mm_to_pt(mm):
return mm / 25.4 * 72
def apply_2_vertical(doc_out, sources, layout):
page_w = layout["page"]["width_pt"]
page_h = layout["page"]["height_pt"]
target_w_pt = mm_to_pt(layout["target_puzzle_width_mm"])
target_h_pt = mm_to_pt(layout["target_puzzle_height_mm"])
crop_margin = layout.get("crop_margin_pt", CROP_MARGIN_FALLBACK)
page = doc_out.new_page(width=page_w, height=page_h)
clips = []
for doc_src in sources:
clip = detect_clip(doc_src[0], crop_margin)
clips.append(clip)
actual_w_mm = clip.width / 72 * 25.4
actual_h_mm = clip.height / 72 * 25.4
scale_w = target_w_pt / clip.width
scale_h = target_h_pt / clip.height
print(f" Puzzle: {actual_w_mm:.1f} x {actual_h_mm:.1f} mm -> scale {scale_w:.3f} x {scale_h:.3f}")
# Pro každý puzzle spočítej scale individuálně
positions = []
for clip in clips:
pw = clip.width * (target_w_pt / clip.width)
ph = clip.height * (target_h_pt / clip.height)
positions.append((pw, ph))
# Vertikální rozmístění — equal gaps (předpokládáme stejnou výšku obou)
ph0 = positions[0][1]
ph1 = positions[1][1]
gap0 = (page_h - ph0 - ph1) / 3
gap1 = gap0
y0 = gap0
y1 = gap0 + ph0 + gap1
for i, (doc_src, clip, (pw, ph)) in enumerate(zip(sources, clips, positions)):
x0 = (page_w - pw) / 2
y_pos = y0 if i == 0 else y1
page.show_pdf_page(
fitz.Rect(x0, y_pos, x0 + pw, y_pos + ph),
doc_src, 0,
clip=clip,
)
side_mm = ((page_w - positions[0][0]) / 2) / 72 * 25.4
gap_mm = gap0 / 72 * 25.4
print(f" Misto po stranach: {side_mm:.1f} mm | Mezera: {gap_mm:.1f} mm")
def main():
parser = argparse.ArgumentParser(description="Aplikuje layout na 2 puzzle PDF")
parser.add_argument("pdf1", help="Prvni puzzle PDF")
parser.add_argument("pdf2", help="Druhy puzzle PDF")
parser.add_argument("vystup", help="Vystupni PDF")
parser.add_argument("--layout", default=DEFAULT_LAYOUT, help=f"Nazev layoutu (default: {DEFAULT_LAYOUT})")
args = parser.parse_args()
if not LAYOUTS_JSON.exists():
print(f"CHYBA: {LAYOUTS_JSON} nenalezen. Spust nejdrive 26_SaveLayout.py.", file=sys.stderr)
sys.exit(1)
layouts = json.loads(LAYOUTS_JSON.read_text(encoding="utf-8"))
if args.layout not in layouts:
print(f"CHYBA: layout '{args.layout}' nenalezen v {LAYOUTS_JSON}", file=sys.stderr)
print(f"Dostupne layouty: {list(layouts.keys())}", file=sys.stderr)
sys.exit(1)
layout = layouts[args.layout]
print(f"Layout: {args.layout}")
print(f"Cilova velikost: {layout['target_puzzle_width_mm']} x {layout['target_puzzle_height_mm']} mm")
doc1 = fitz.open(args.pdf1)
doc2 = fitz.open(args.pdf2)
doc_out = fitz.open()
apply_2_vertical(doc_out, [doc1, doc2], layout)
doc_out.save(args.vystup)
doc1.close()
doc2.close()
doc_out.close()
print(f"Ulozeno: {args.vystup}")
if __name__ == "__main__":
main()
@@ -0,0 +1,199 @@
"""
Batch ořez puzzle z MySQL.
Pro každý řádek v sudoku_killer kde file_puzzle_cropped IS NULL:
- načte file_puzzle + crop_method
- ořízne podle metody
- uloží zpět do file_puzzle_cropped
"""
# ---------------------------------------------------------------------------
# Nastavení — upravuj zde před spuštěním v PyCharm
# ---------------------------------------------------------------------------
WORKERS = 4 # počet paralelních procesů
LIMIT = None # None = vše; číslo (např. 20) = jen prvních N puzzle (pro testování)
BATCH = 200 # kolik oříznutých PDF uložit najednou do DB
DRY_RUN = False # True = jen ořez, nic se neuloží do DB
LOG_EVERY = 500 # vypiš stav do konzole každých N zpracovaných puzzle
# ---------------------------------------------------------------------------
import sys
import json
import csv
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor, as_completed
import fitz
from tqdm import tqdm
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from mysql_db import connect_mysql
sys.stdout.reconfigure(encoding="utf-8")
sys.stderr.reconfigure(encoding="utf-8")
ERRORS_CSV = Path(__file__).parent / "crop_errors.csv"
# ---------------------------------------------------------------------------
# Crop metody — přidat sem nové funkce pro nové metody
# ---------------------------------------------------------------------------
def crop_raycast_auto(pdf_bytes: bytes, params: dict) -> bytes:
crop_margin = params.get("crop_margin_pt", 2)
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
page = doc[0]
paths = page.get_drawings()
y_mid = page.mediabox.height / 2
hit_h = [(p["rect"], p.get("width") or 0) for p in paths
if p["rect"].y0 <= y_mid <= p["rect"].y1]
if not hit_h:
raise ValueError("ray-cast: zadne kresby na y_mid")
rects = [r for r, _ in hit_h]
x_left = min(r.x0 for r in rects)
x_right = max(r.x1 for r in rects)
top_cut = min(r.y0 for r in rects)
bot_cut = max(r.y1 for r in rects)
lw_l = next((lw for r, lw in hit_h if r.x0 == x_left), 0)
lw_r = next((lw for r, lw in hit_h if r.x1 == x_right), 0)
clip = fitz.Rect(
x_left - lw_l / 2 - crop_margin,
top_cut - crop_margin,
x_right + lw_r / 2 + crop_margin,
bot_cut + crop_margin,
)
doc_new = fitz.open()
p = doc_new.new_page(width=clip.width, height=clip.height)
p.show_pdf_page(fitz.Rect(0, 0, clip.width, clip.height), doc, 0, clip=clip)
out = doc_new.tobytes()
doc.close()
doc_new.close()
return out
CROP_METHODS = {
"raycast_auto": crop_raycast_auto,
}
# ---------------------------------------------------------------------------
# Worker — spouští se v samostatném procesu
# ---------------------------------------------------------------------------
def process_one(args):
puzzle_id, puzzle_number, pdf_bytes, method_name, params_json = args
try:
params = json.loads(params_json) if isinstance(params_json, str) else params_json
fn = CROP_METHODS.get(method_name)
if fn is None:
return puzzle_id, puzzle_number, None, f"neznama metoda: {method_name}"
cropped = fn(bytes(pdf_bytes), params)
return puzzle_id, puzzle_number, cropped, None
except Exception as e:
return puzzle_id, puzzle_number, None, str(e)
# ---------------------------------------------------------------------------
# Hlavní logika
# ---------------------------------------------------------------------------
def fetch_todo(limit):
import pymysql.cursors
conn = connect_mysql(database="puzzle", cursorclass=pymysql.cursors.DictCursor)
cur = conn.cursor()
sql = """
SELECT sk.id, sk.puzzle_number, sk.file_puzzle,
cm.name AS method_name, cm.params_json
FROM sudoku_killer sk
JOIN puzzle_crop_method cm ON sk.crop_method_id = cm.id
WHERE sk.file_puzzle_cropped IS NULL
ORDER BY sk.puzzle_number
"""
if limit:
sql += f" LIMIT {int(limit)}"
cur.execute(sql)
rows = cur.fetchall()
cur.close()
conn.close()
return rows
def save_cropped(updates: list[tuple]):
"""updates = [(cropped_bytes, puzzle_id), ...]"""
import pymysql.cursors
conn = connect_mysql(database="puzzle", cursorclass=pymysql.cursors.DictCursor)
cur = conn.cursor()
cur.executemany(
"UPDATE sudoku_killer SET file_puzzle_cropped = %s WHERE id = %s",
updates,
)
cur.close()
conn.close()
def main():
print("Nacitam seznam puzzle k orizeni...")
rows = fetch_todo(LIMIT)
total = len(rows)
if total == 0:
print("Vsechny puzzle jsou jiz orizeny.")
return
print(f"Ke zpracovani: {total} puzzle | workers: {WORKERS} | batch: {BATCH} | dry-run: {DRY_RUN}")
errors = []
pending_saves = [] # [(cropped_bytes, puzzle_id)]
done = 0
tasks = [
(r["id"], r["puzzle_number"], r["file_puzzle"], r["method_name"], r["params_json"])
for r in rows
]
with ProcessPoolExecutor(max_workers=WORKERS) as executor:
futures = {executor.submit(process_one, t): t for t in tasks}
with tqdm(total=total, unit="puzzle") as bar:
for future in as_completed(futures):
puzzle_id, puzzle_number, cropped, err = future.result()
if err:
errors.append({"puzzle_id": puzzle_id, "puzzle_number": puzzle_number, "chyba": err})
tqdm.write(f" [CHYBA] puzzle #{puzzle_number}: {err}")
elif not DRY_RUN:
pending_saves.append((cropped, puzzle_id))
if len(pending_saves) >= BATCH:
save_cropped(pending_saves)
pending_saves.clear()
done += 1
bar.update(1)
bar.set_postfix(chyby=len(errors), ulozeno=done - len(errors) - len(pending_saves))
if done % LOG_EVERY == 0:
zbyvá = total - done
pct = done / total * 100
tqdm.write(f" >> {done}/{total} ({pct:.1f}%) | puzzle #{puzzle_number} | zbyvá: {zbyvá} | chyby: {len(errors)}")
# Uložit zbývající
if pending_saves and not DRY_RUN:
save_cropped(pending_saves)
if errors:
with open(ERRORS_CSV, "w", newline="", encoding="utf-8") as f:
w = csv.DictWriter(f, fieldnames=["puzzle_id", "puzzle_number", "chyba"])
w.writeheader()
w.writerows(errors)
print(f"\nChyby: {len(errors)} — viz {ERRORS_CSV}")
else:
print("\nVse bez chyb.")
ok = done - len(errors)
print(f"Hotovo: {ok} orizeno, {len(errors)} chyb, {total - done} preskoceno.")
if __name__ == "__main__":
main()
@@ -0,0 +1,36 @@
"""
Exportuje originální PDF puzzle z tabulky sudoku_killer pro porovnání.
"""
import sys
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from mysql_db import connect_mysql
OUTPUT_DIR = Path(__file__).parent
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
cur.execute(
"SELECT puzzle_number, file_puzzle, file_solution "
"FROM sudoku_killer WHERE puzzle_number = 31414"
)
row = cur.fetchone()
cur.close()
conn.close()
if not row:
print("Puzzle 31414 nenalezen v sudoku_killer.")
else:
num, pdf_puzzle, pdf_solution = row
if pdf_puzzle:
path = OUTPUT_DIR / f"original_{num}_puzzle.pdf"
path.write_bytes(pdf_puzzle)
print(f"Uloženo: {path}")
if pdf_solution:
path = OUTPUT_DIR / f"original_{num}_solution.pdf"
path.write_bytes(pdf_solution)
print(f"Uloženo: {path}")
@@ -0,0 +1,151 @@
"""
Naimportuje stažené PDF puzzle z DownloadedPuzzles/ do MySQL tabulky sudoku_killer.
Spuštění:
python import_do_mysql.py # přeskočí již existující (podle puzzle_number)
python import_do_mysql.py --all # reimportuje vše (přepíše existující)
"""
import re
import sys
import argparse
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from mysql_db import connect_mysql
sys.stdout.reconfigure(encoding="utf-8")
sys.stderr.reconfigure(encoding="utf-8")
SAVE_DIR = Path(__file__).parent / "DownloadedPuzzles"
# 2009-01-01 Puzzle SudokuKiller 1 [difficulty 5 of 10] [average solving time 47 min].pdf
FILENAME_RE = re.compile(
r"^(?P<date>\d{4}-\d{2}-\d{2}) Puzzle (?P<type>SudokuKillerGreaterThan|SudokuKiller) (?P<num>\d+) "
r"\[difficulty (?P<diff>\d+) of (?P<maxdiff>\d+)\] "
r"\[average solving time (?P<time>[^\]]+)\]"
r"(?P<solution> \[solution\])?\.pdf$"
)
def parse_time_to_minutes(time_str):
"""Převede '47 min', '1h 7m', '17h 44m' na celkový počet minut."""
time_str = time_str.strip()
m = re.match(r"^(\d+)h\s+(\d+)m$", time_str)
if m:
return int(m.group(1)) * 60 + int(m.group(2))
m = re.match(r"^(\d+)\s+min$", time_str)
if m:
return int(m.group(1))
return None
def load_puzzle_types(cursor):
cursor.execute("SELECT id, name FROM puzzle_type")
return {row["name"]: row["id"] for row in cursor.fetchall()}
def load_existing_numbers(cursor):
cursor.execute("SELECT puzzle_number FROM sudoku_killer")
return {row["puzzle_number"] for row in cursor.fetchall()}
def parse_files():
"""Vrátí dict: puzzle_number -> {"puzzle": Path, "solution": Path|None, metadata...}"""
puzzles = {}
for f in SAVE_DIR.iterdir():
m = FILENAME_RE.match(f.name)
if not m:
print(f"[SKIP] Nerozpoznaný název: {f.name}", file=sys.stderr)
continue
num = int(m.group("num"))
if num not in puzzles:
puzzles[num] = {
"puzzle_number": num,
"puzzle_date": m.group("date"),
"puzzle_type": m.group("type"),
"difficulty": int(m.group("diff")),
"max_difficulty": int(m.group("maxdiff")),
"avg_minutes": parse_time_to_minutes(m.group("time")),
"file_puzzle": None,
"file_solution": None,
}
if m.group("solution"):
puzzles[num]["file_solution"] = f
else:
puzzles[num]["file_puzzle"] = f
return puzzles
def import_puzzle(cursor, puzzle, type_ids):
if puzzle["file_puzzle"] is None:
print(f"[SKIP] puzzle_number={puzzle['puzzle_number']}: chybí PDF puzzlu")
return False
type_id = type_ids.get(puzzle["puzzle_type"])
if type_id is None:
print(f"[SKIP] Neznámý typ: {puzzle['puzzle_type']}")
return False
pdf_puzzle = puzzle["file_puzzle"].read_bytes()
pdf_solution = puzzle["file_solution"].read_bytes() if puzzle["file_solution"] else None
cursor.execute("""
INSERT INTO sudoku_killer
(puzzle_number, puzzle_type_id, puzzle_date, difficulty, max_difficulty,
avg_solving_time_minutes, file_puzzle, file_solution)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
puzzle_type_id = VALUES(puzzle_type_id),
puzzle_date = VALUES(puzzle_date),
difficulty = VALUES(difficulty),
max_difficulty = VALUES(max_difficulty),
avg_solving_time_minutes = VALUES(avg_solving_time_minutes),
file_puzzle = VALUES(file_puzzle),
file_solution = VALUES(file_solution)
""", (
puzzle["puzzle_number"],
type_id,
puzzle["puzzle_date"],
puzzle["difficulty"],
puzzle["max_difficulty"],
puzzle["avg_minutes"],
pdf_puzzle,
pdf_solution,
))
return True
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--all", action="store_true", help="Reimportuje i existující záznamy")
args = parser.parse_args()
import pymysql.cursors
conn = connect_mysql(database="puzzle", cursorclass=pymysql.cursors.DictCursor)
cursor = conn.cursor()
type_ids = load_puzzle_types(cursor)
existing = load_existing_numbers(cursor) if not args.all else set()
puzzles = parse_files()
total = len(puzzles)
print(f"Nalezeno {total} puzzle v adresáři.")
imported = skipped = errors = 0
for i, (num, puzzle) in enumerate(sorted(puzzles.items()), 1):
if num in existing:
skipped += 1
continue
try:
if import_puzzle(cursor, puzzle, type_ids):
imported += 1
else:
errors += 1
except Exception as e:
print(f"[CHYBA] puzzle_number={num}: {e}", file=sys.stderr)
errors += 1
if i % 500 == 0:
print(f" {i}/{total} zpracováno ({imported} importováno, {skipped} přeskočeno, {errors} chyb)")
cursor.close()
conn.close()
print(f"\nHotovo: {imported} importováno, {skipped} přeskočeno, {errors} chyb.")
if __name__ == "__main__":
main()
@@ -0,0 +1,23 @@
{
"2PuzzleOnA4": {
"description": "2 puzzle pod sebou, horizontalne vycentrovane, misto po stranach na vypocty",
"page": {
"format": "A4",
"width_pt": 595.276,
"height_pt": 841.89
},
"count": 2,
"arrangement": "vertical",
"horizontal_align": "center",
"vertical_distribution": "equal_gaps",
"target_puzzle_width_mm": 117.83,
"target_puzzle_height_mm": 117.83,
"crop_margin_pt": 2,
"info": {
"sample_raw_puzzle_mm": "107.12 x 107.12",
"scale_used_for_sample": 1.1,
"side_margin_mm": 46.09,
"gap_between_puzzles_mm": 20.45
}
}
}
@@ -0,0 +1,184 @@
"""
Průzkumný skript: zkouší najít strukturovaná data puzzle
na dailykillersudoku.com (cage definice, řešení).
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/70000"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
# Zachytávat network requesty
api_responses = []
async def on_response(response):
url = response.url
if any(k in url for k in ["api", "puzzle", "data", "json", "cage", "grid"]):
try:
body = await response.text()
api_responses.append({"url": url, "status": response.status, "body": body[:2000]})
except:
api_responses.append({"url": url, "status": response.status, "body": "(could not read)"})
page.on("response", on_response)
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# 1) Network requesty
print("\n=== Zachycené API/data requesty ===")
for r in api_responses:
print(f"\n URL: {r['url']}")
print(f" Status: {r['status']}")
if r['body'] and len(r['body']) < 2000:
print(f" Body: {r['body'][:500]}")
# 2) Globální JS proměnné
print("\n=== Globální proměnné ===")
globals_check = await page.evaluate("""() => {
const names = ['puzzle', 'puzzleData', 'gameData', 'game', 'board',
'grid', 'cages', 'cells', 'solution', 'killerData',
'sudoku', 'level', 'data', 'config', 'state',
'app', 'store', 'vuex', '__NUXT__', '__NEXT_DATA__',
'initialData', 'pageData', 'props', 'serverData'];
const found = {};
for (const name of names) {
if (typeof window[name] !== 'undefined') {
const val = window[name];
found[name] = {
type: typeof val,
keys: typeof val === 'object' && val !== null ? Object.keys(val).slice(0, 20) : null
};
}
}
return found;
}""")
print(json.dumps(globals_check, indent=2))
# 3) SVG/Canvas analýza
print("\n=== SVG/Canvas elementy ===")
svg_info = await page.evaluate("""() => {
const svgs = document.querySelectorAll('svg');
const canvases = document.querySelectorAll('canvas');
return {
svg_count: svgs.length,
canvas_count: canvases.length,
svg_ids: Array.from(svgs).map(s => s.id || s.className || '(no id)').slice(0, 5),
canvas_ids: Array.from(canvases).map(c => c.id || c.className || '(no id)').slice(0, 5)
};
}""")
print(json.dumps(svg_info, indent=2))
# 4) Data atributy
print("\n=== Elementy s data- atributy ===")
data_attrs = await page.evaluate("""() => {
const all = document.querySelectorAll('[data-cage], [data-cell], [data-sum], [data-group], [data-value], [data-row], [data-col]');
return {
count: all.length,
samples: Array.from(all).slice(0, 5).map(el => ({
tag: el.tagName,
attrs: Object.fromEntries(Array.from(el.attributes).filter(a => a.name.startsWith('data-')).map(a => [a.name, a.value]))
}))
};
}""")
print(json.dumps(data_attrs, indent=2))
# 5) Tabulky a mřížky
print("\n=== Tabulky / grid struktury ===")
tables = await page.evaluate("""() => {
const tables = document.querySelectorAll('table');
const grids = document.querySelectorAll('[class*=grid], [class*=puzzle], [class*=board], [class*=cage], [class*=cell], [id*=grid], [id*=puzzle], [id*=board]');
return {
table_count: tables.length,
grid_elements: Array.from(grids).slice(0, 10).map(el => ({
tag: el.tagName,
id: el.id,
class: el.className.toString().substring(0, 100),
children: el.children.length
}))
};
}""")
print(json.dumps(tables, indent=2))
# 6) Script tagy s daty
print("\n=== Script tagy s daty ===")
scripts = await page.evaluate("""() => {
const scripts = document.querySelectorAll('script');
const results = [];
for (const s of scripts) {
const text = s.textContent || '';
if (text.length > 10 && text.length < 50000) {
const keywords = ['puzzle', 'cage', 'cell', 'grid', 'solution', 'board', 'sum'];
const found = keywords.filter(k => text.toLowerCase().includes(k));
if (found.length > 0) {
results.push({
keywords: found,
length: text.length,
snippet: text.substring(0, 500)
});
}
}
}
return results;
}""")
print(json.dumps(scripts, indent=2, ensure_ascii=False)[:5000])
# 7) Vue/React/Angular state
print("\n=== Framework state ===")
framework = await page.evaluate("""() => {
// Vue
const vueEl = document.querySelector('[data-v-app]') || document.querySelector('#app') || document.querySelector('#__nuxt');
let vueData = null;
if (vueEl && vueEl.__vue_app__) {
vueData = 'Vue 3 app found';
} else if (vueEl && vueEl.__vue__) {
vueData = 'Vue 2 app found';
try {
const d = vueEl.__vue__.$data;
vueData = {type: 'Vue 2', keys: Object.keys(d)};
} catch(e) {}
}
// __NUXT__
if (typeof __NUXT__ !== 'undefined') {
try { vueData = {type: 'Nuxt', keys: Object.keys(__NUXT__)}; } catch(e) {}
}
// React
let reactData = null;
const reactRoot = document.querySelector('#__next') || document.querySelector('#root');
if (reactRoot) {
const fiberKey = Object.keys(reactRoot).find(k => k.startsWith('__reactFiber') || k.startsWith('__reactInternalInstance'));
if (fiberKey) reactData = 'React app found';
}
return {vue: vueData, react: reactData};
}""")
print(json.dumps(framework, indent=2))
# 8) Všechny window properties (custom)
print("\n=== Custom window properties ===")
custom_props = await page.evaluate("""() => {
const iframe = document.createElement('iframe');
document.body.appendChild(iframe);
const defaultKeys = new Set(Object.keys(iframe.contentWindow));
document.body.removeChild(iframe);
const custom = Object.keys(window).filter(k => !defaultKeys.has(k) && !k.startsWith('__'));
return custom.slice(0, 50);
}""")
print(json.dumps(custom_props, indent=2))
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,174 @@
"""
Průzkumný skript v2: zkouší najít strukturovaná data puzzle
na dailykillersudoku.com — prozkoumá DKS objekt a platný puzzle.
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/376"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
api_responses = []
async def on_response(response):
url = response.url
if "dailykillersudoku" in url and url != URL:
try:
ct = response.headers.get("content-type", "")
if "json" in ct or "javascript" in ct or "text" in ct:
body = await response.text()
if len(body) < 5000:
api_responses.append({"url": url, "status": response.status, "body": body[:2000]})
else:
api_responses.append({"url": url, "status": response.status, "body": f"({len(body)} chars)"})
except:
pass
page.on("response", on_response)
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# 1) DKS objekt — klíče
print("\n=== DKS objekt — klíče ===")
dks = await page.evaluate("""() => {
if (typeof DKS === 'undefined') return null;
const result = {};
for (const key of Object.keys(DKS)) {
const val = DKS[key];
const t = typeof val;
if (t === 'function') {
result[key] = 'function';
} else if (t === 'object' && val !== null) {
result[key] = {type: 'object', keys: Object.keys(val).slice(0, 15)};
} else {
result[key] = val;
}
}
return result;
}""")
if dks:
print(json.dumps(dks, indent=2, ensure_ascii=False)[:5000])
# 2) DKS.board nebo podobné puzzle objekty
print("\n=== DKS puzzle-related data ===")
puzzle_data = await page.evaluate("""() => {
if (typeof DKS === 'undefined') return null;
const result = {};
const interesting = ['board', 'puzzle', 'game', 'grid', 'cages', 'cells',
'solution', 'currentPuzzle', 'puzzleData', 'data',
'sudoku', 'killer', 'state'];
for (const key of Object.keys(DKS)) {
if (interesting.some(i => key.toLowerCase().includes(i))) {
try {
result[key] = JSON.parse(JSON.stringify(DKS[key]));
} catch(e) {
result[key] = String(DKS[key]).substring(0, 200);
}
}
}
return result;
}""")
if puzzle_data:
print(json.dumps(puzzle_data, indent=2, ensure_ascii=False)[:8000])
else:
print(" žádné puzzle data")
# 3) Script tagy s puzzle daty
print("\n=== Script tagy s puzzle daty ===")
scripts = await page.evaluate("""() => {
const scripts = document.querySelectorAll('script');
const results = [];
for (const s of scripts) {
const text = s.textContent || '';
if (text.includes('cage') || text.includes('cell') || text.includes('solution')
|| text.includes('group') || text.includes('sum') || text.includes('Board')
|| text.includes('Puzzle')) {
results.push({
length: text.length,
snippet: text.substring(0, 1000)
});
}
}
return results;
}""")
print(json.dumps(scripts, indent=2, ensure_ascii=False)[:8000])
# 4) Zachycené requesty
print("\n=== Zachycené requesty (dailykillersudoku) ===")
for r in api_responses:
print(f"\n URL: {r['url']}")
print(f" Status: {r['status']}")
print(f" Body: {r['body'][:500]}")
# 5) SVG obsah — puzzle mřížka
print("\n=== SVG puzzle mřížka ===")
svg_data = await page.evaluate("""() => {
const svgs = document.querySelectorAll('svg');
const results = [];
for (const svg of svgs) {
const html = svg.outerHTML;
if (html.length > 1000) {
// Pravděpodobně puzzle mřížka
const texts = svg.querySelectorAll('text');
const textContent = Array.from(texts).map(t => ({
text: t.textContent,
x: t.getAttribute('x'),
y: t.getAttribute('y'),
class: t.getAttribute('class')
}));
const paths = svg.querySelectorAll('path');
results.push({
size: html.length,
width: svg.getAttribute('width'),
height: svg.getAttribute('height'),
viewBox: svg.getAttribute('viewBox'),
text_count: texts.length,
path_count: paths.length,
texts: textContent.slice(0, 30)
});
}
}
return results;
}""")
print(json.dumps(svg_data, indent=2, ensure_ascii=False)[:5000])
# 6) Hledej inline JS s daty puzzle
print("\n=== Inline JS s puzzle daty ===")
inline_data = await page.evaluate("""() => {
const html = document.documentElement.innerHTML;
// Hledej vzory jako JSON pole, cage definice apod.
const patterns = [
/DKS\.\w+\s*=\s*(\{[^}]{20,}\})/g,
/DKS\.\w+\s*=\s*(\[[^\]]{20,}\])/g,
/var\s+\w+\s*=\s*(\{[^}]{50,}\})/g,
/puzzl\w*\s*[:=]\s*["'{[]/gi
];
const found = [];
for (const p of patterns) {
let m;
while ((m = p.exec(html)) !== null) {
found.push(m[0].substring(0, 300));
}
}
return found;
}""")
print(json.dumps(inline_data, indent=2, ensure_ascii=False)[:3000])
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,156 @@
"""
Průzkumný skript v3: dekóduje board_base64 a solution_base64
z dailykillersudoku.com — zjistí formát dat.
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/376"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# 1) Vytáhni JSON puzzle dat
print("\n=== Puzzle JSON ===")
puzzle_json = await page.evaluate("""() => {
return DKS.puzzle._json;
}""")
print(json.dumps(puzzle_json, indent=2))
# 2) Dekóduj base64 → raw bytes
print("\n=== board_base64 dekódováno ===")
board_bytes = await page.evaluate("""() => {
const b64 = DKS.puzzle._json.board_base64;
const bytes = DKS.base64ToByteArray(b64);
return Array.from(bytes);
}""")
print(f" Délka: {len(board_bytes)} bytes")
print(f" Raw: {board_bytes}")
print("\n=== solution_base64 dekódováno ===")
sol_bytes = await page.evaluate("""() => {
const b64 = DKS.puzzle._json.solution_base64;
const bytes = DKS.base64ToByteArray(b64);
return Array.from(bytes);
}""")
print(f" Délka: {len(sol_bytes)} bytes")
print(f" Raw: {sol_bytes}")
# 3) Jak Board parsuje data
print("\n=== Board po rozbalení ===")
board_data = await page.evaluate("""() => {
const board = DKS.puzzle.board;
return {
size: board.size,
cell_count: board._canvas ? 'has canvas' : 'no canvas',
};
}""")
print(json.dumps(board_data, indent=2))
# 4) Buňky a klece z board
print("\n=== Board cells ===")
cells_data = await page.evaluate("""() => {
const board = DKS.puzzle.board;
if (!board._cells) return 'no _cells';
const result = [];
for (let r = 0; r < board.size; r++) {
for (let c = 0; c < board.size; c++) {
const cell = board._cells[r][c];
result.push({
row: r, col: c,
value: cell._value || cell.value,
cage: cell._cage ? {
sum: cell._cage._sum || cell._cage.sum,
id: cell._cage._id || cell._cage.id
} : null
});
}
}
return result;
}""")
if isinstance(cells_data, list):
print(f" Celkem buněk: {len(cells_data)}")
for c in cells_data[:20]:
print(f" [{c['row']},{c['col']}] value={c.get('value')} cage={c.get('cage')}")
else:
print(f" {cells_data}")
# 5) Zkus přístup přes cages
print("\n=== Cages ===")
cages_data = await page.evaluate("""() => {
const board = DKS.puzzle.board;
// Zkus najít cages
const props = Object.keys(board).filter(k => !k.startsWith('_') || k.includes('cage') || k.includes('Cage'));
const allProps = Object.keys(board);
return {all_props: allProps, filtered: props};
}""")
print(json.dumps(cages_data, indent=2))
# 6) Všechny vlastnosti boardu
print("\n=== Board — všechny vlastnosti ===")
board_full = await page.evaluate("""() => {
const board = DKS.puzzle.board;
const result = {};
for (const key of Object.keys(board)) {
const val = board[key];
const t = typeof val;
if (t === 'function') continue;
if (t === 'object' && val !== null) {
if (Array.isArray(val)) {
result[key] = `Array(${val.length})`;
if (val.length > 0 && val.length < 100) {
try {
const sample = val[0];
result[key + '_sample'] = typeof sample === 'object' ? Object.keys(sample || {}).slice(0,10) : sample;
} catch(e) {}
}
} else {
result[key] = Object.keys(val).slice(0, 10);
}
} else {
result[key] = val;
}
}
return result;
}""")
print(json.dumps(board_full, indent=2, ensure_ascii=False)[:5000])
# 7) Solution data
print("\n=== Solution ===")
solution_data = await page.evaluate("""() => {
const sol = DKS.puzzle.solution;
if (!sol) return 'no solution';
const props = Object.keys(sol);
const result = {props: props};
for (const p of props) {
const v = sol[p];
if (typeof v !== 'function') {
if (Array.isArray(v)) {
result[p] = v.slice(0, 20);
} else {
result[p] = v;
}
}
}
return result;
}""")
print(json.dumps(solution_data, indent=2, ensure_ascii=False)[:3000])
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,77 @@
"""
Průzkumný skript v4: vytáhne klece (cages) z DKS.puzzle.board.
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/376"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# Klece
print("\n=== Cages ===")
cages = await page.evaluate("""() => {
const board = DKS.puzzle.board;
return board._cages.map((cage, i) => ({
id: i,
sum: cage.sum,
cells: cage.cells.map(c => ({row: c._row, col: c._col}))
}));
}""")
for cage in cages:
cells_str = ", ".join(f"({c['row']},{c['col']})" for c in cage['cells'])
print(f" Klec {cage['id']:2d}: sum={cage['sum']:2d}, buňky=[{cells_str}]")
# Řešení
print("\n=== Řešení ===")
solution = await page.evaluate("""() => {
return DKS.puzzle.solution._values;
}""")
for r, row in enumerate(solution):
print(f" Řádek {r}: {row}")
# Cage map — ověření
print("\n=== Cage map (ověření) ===")
cage_map = await page.evaluate("""() => {
const board = DKS.puzzle.board;
const map = [];
for (let r = 0; r < board.size; r++) {
const row = [];
for (let c = 0; c < board.size; c++) {
const cell = board._cells[r][c];
const cageIdx = board._cages.indexOf(cell._cage);
row.push(cageIdx);
}
map.push(row);
}
return map;
}""")
for r, row in enumerate(cage_map):
print(f" {row}")
# Ověření součtů
print("\n=== Ověření součtů ===")
for cage in cages:
total = sum(solution[c['row']][c['col']] for c in cage['cells'])
ok = "" if total == cage['sum'] else ""
print(f" Klec {cage['id']:2d}: sum={cage['sum']:2d}, actual={total:2d} {ok}")
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,113 @@
"""
Průzkumný skript v5: najde správné property names pro cell row/col.
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/376"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# Zjisti property names buněk v klecích
print("\n=== Cell properties ===")
cell_props = await page.evaluate("""() => {
const cage = DKS.puzzle.board._cages[0];
const cell = cage.cells[0];
return Object.keys(cell);
}""")
print(json.dumps(cell_props, indent=2))
# Zkus všechny varianty row/col
print("\n=== Cell row/col lookup ===")
cell_data = await page.evaluate("""() => {
const cage = DKS.puzzle.board._cages[0];
const cell = cage.cells[0];
const result = {};
for (const key of Object.keys(cell)) {
const val = cell[key];
if (typeof val !== 'function' && typeof val !== 'object') {
result[key] = val;
}
}
return result;
}""")
print(json.dumps(cell_data, indent=2))
# Klece s buňkami — správné property
print("\n=== Cages s buňkami ===")
cages = await page.evaluate("""() => {
const board = DKS.puzzle.board;
return board._cages.map((cage, i) => {
const cells = cage.cells.map(c => {
// Najdi row/col property
const keys = Object.keys(c);
const rowKey = keys.find(k => k.toLowerCase().includes('row') && typeof c[k] === 'number');
const colKey = keys.find(k => (k.toLowerCase().includes('col') || k.toLowerCase().includes('column')) && typeof c[k] === 'number');
return {
row: rowKey ? c[rowKey] : null,
col: colKey ? c[colKey] : null,
rowKey: rowKey,
colKey: colKey
};
});
return {id: i, sum: cage.sum, cells: cells};
});
}""")
for cage in cages[:5]:
cells_str = ", ".join(f"({c['row']},{c['col']})" for c in cage['cells'])
print(f" Klec {cage['id']:2d}: sum={cage['sum']:2d}, buňky=[{cells_str}]")
if cage['id'] == 0:
print(f" rowKey={cage['cells'][0]['rowKey']}, colKey={cage['cells'][0]['colKey']}")
# Pokud row/col stále None, zkus index-based approach
print("\n=== Fallback: cage map z _cells ===")
cage_map = await page.evaluate("""() => {
const board = DKS.puzzle.board;
const result = [];
for (const cage of board._cages) {
const cellPositions = [];
for (const cageCell of cage.cells) {
// Najdi pozici buňky v _cells mřížce
for (let r = 0; r < board.size; r++) {
for (let c = 0; c < board.size; c++) {
if (board._cells[r][c] === cageCell) {
cellPositions.push({row: r, col: c});
}
}
}
}
result.push({sum: cage.sum, cells: cellPositions});
}
return result;
}""")
for i, cage in enumerate(cage_map):
cells_str = ", ".join(f"({c['row']},{c['col']})" for c in cage['cells'])
print(f" Klec {i:2d}: sum={cage['sum']:2d}, buňky=[{cells_str}]")
# Ověření součtů
print("\n=== Ověření součtů ===")
solution = await page.evaluate("() => DKS.puzzle.solution._values")
for i, cage in enumerate(cage_map):
total = sum(solution[c['row']][c['col']] for c in cage['cells'])
ok = "" if total == cage['sum'] else ""
print(f" Klec {i:2d}: sum={cage['sum']:2d}, actual={total:2d} {ok}")
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,35 @@
"""
Zjistí rozsah puzzle v sudoku_killer tabulce a počet.
"""
import sys
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from mysql_db import connect_mysql
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
cur.execute("""
SELECT puzzle_type_id, COUNT(*), MIN(puzzle_number), MAX(puzzle_number),
MIN(puzzle_date), MAX(puzzle_date)
FROM sudoku_killer
GROUP BY puzzle_type_id
""")
for row in cur.fetchall():
print(f" type_id={row[0]}, count={row[1]}, nums={row[2]}-{row[3]}, dates={row[4]}-{row[5]}")
cur.execute("SELECT id, name FROM puzzle_type")
for row in cur.fetchall():
print(f" puzzle_type: id={row[0]}, name={row[1]}")
cur.execute("""
SELECT COUNT(*) FROM puzzles WHERE game_type = 'killer_sudoku'
""")
print(f" Už v puzzles tabulce: {cur.fetchone()[0]}")
cur.close()
conn.close()
@@ -0,0 +1,279 @@
"""
Stáhne / přejmenuje Greater-Than Killer Sudoku puzzle + solutions z dailykillersudoku.com.
Název souboru: YYYY-MM-DD Puzzle SudokuKillerGreaterThan {n} [difficulty {d} of 10] [average solving time {t}].pdf
Logika:
1. Načte všechna čísla GT puzzlů ze search (t=4, d=2..10, všechny stránky)
2. Pro každé číslo:
- existuje SudokuKillerGreaterThan {n} → přeskočit
- existuje SudokuKiller {n} → přejmenovat na SudokuKillerGreaterThan
- jinak → stáhnout z /pdfs/{n}.pdf
Spuštění:
python stahni_greater_than.py
"""
import re
import sys
import time
import threading
from datetime import datetime
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
from bs4 import BeautifulSoup
sys.stdout.reconfigure(encoding="utf-8")
sys.stderr.reconfigure(encoding="utf-8")
BASE_URL = "https://www.dailykillersudoku.com"
SAVE_DIR = Path(__file__).parent / "DownloadedPuzzles"
SAVE_DIR.mkdir(exist_ok=True)
DELAY = 0.1 # sekundy mezi requesty v rámci jednoho vlákna
NUM_THREADS = 6
SESSION = requests.Session()
SESSION.headers.update({"User-Agent": "Mozilla/5.0 (compatible; puzzle-downloader/1.0)"})
_print_lock = threading.Lock()
def tname() -> str:
name = threading.current_thread().name
if name == "MainThread":
return "[Hlavní]"
try:
return f"[T{int(name.split('_')[-1]) + 1}]"
except (ValueError, IndexError):
return f"[{name[:8]}]"
def tprint(*args, **kwargs):
with _print_lock:
print(tname(), *args, **kwargs)
# ---------------------------------------------------------------------------
# Získání čísel GT puzzlů ze search
# ---------------------------------------------------------------------------
def get_page_puzzle_ids(d: int, page: int) -> list[int]:
url = f"{BASE_URL}/search?d={d}&t=4&p={page}"
try:
resp = SESSION.get(url, timeout=15)
except requests.RequestException as e:
tprint(f" Chyba načítání search d={d} p={page}: {e}")
return []
ids = re.findall(r'id="board(\d+)"', resp.text)
return [int(i) for i in ids]
def get_max_page(d: int) -> int:
url = f"{BASE_URL}/search?d={d}&t=4&s=0"
try:
resp = SESSION.get(url, timeout=15)
except requests.RequestException:
return 0
pages = re.findall(r'href="/search\?[^"]*p=(\d+)"', resp.text)
return max([int(p) for p in pages], default=1) if pages else 1
def collect_all_gt_numbers() -> list[int]:
"""Projde search (d=2..10, t=4) a vrátí seřazený seznam všech GT čísel."""
all_ids = set()
for d in range(2, 11):
max_p = get_max_page(d)
if max_p == 0:
continue
tprint(f" Difficulty {d}: {max_p} stránek")
for page in range(1, max_p + 1):
ids = get_page_puzzle_ids(d, page)
all_ids.update(ids)
time.sleep(DELAY)
return sorted(all_ids)
# ---------------------------------------------------------------------------
# Čtení existujících souborů
# ---------------------------------------------------------------------------
def find_downloaded_killer() -> dict[int, Path]:
"""Vrátí {číslo: cesta} pro SudokuKiller (ne GreaterThan) soubory (puzzle, ne solution)."""
result = {}
for f in SAVE_DIR.glob("*Puzzle SudokuKiller *.pdf"):
if "[solution]" in f.name or "GreaterThan" in f.name:
continue
m = re.search(r"SudokuKiller (\d+)", f.name)
if m:
result[int(m.group(1))] = f
return result
def find_downloaded_gt() -> set[int]:
"""Vrátí čísla již stažených/přejmenovaných SudokuKillerGreaterThan souborů."""
result = set()
for f in SAVE_DIR.glob("*Puzzle SudokuKillerGreaterThan *.pdf"):
if "[solution]" in f.name:
continue
m = re.search(r"SudokuKillerGreaterThan (\d+)", f.name)
if m:
result.add(int(m.group(1)))
return result
# ---------------------------------------------------------------------------
# Přejmenování / stažení
# ---------------------------------------------------------------------------
def killer_to_gt_filename(path: Path) -> str:
return path.name.replace("SudokuKiller ", "SudokuKillerGreaterThan ")
def rename_pair(n: int, killer_path: Path) -> bool:
"""Přejmenuje puzzle + solution soubory SudokuKiller → SudokuKillerGreaterThan."""
ok = True
for f in [killer_path,
killer_path.with_name(killer_path.stem + " [solution].pdf")]:
if not f.exists():
if "[solution]" in f.name:
continue # solution soubor nemusí existovat
tprint(f" Soubor nenalezen pro přejmenování: {f.name}")
ok = False
continue
new_name = killer_to_gt_filename(f)
new_path = SAVE_DIR / new_name
f.rename(new_path)
tprint(f" Přejmenováno: {f.name}{new_name}")
return ok
def get_puzzle_info(n: int) -> dict | None:
url = f"{BASE_URL}/search?n={n}"
try:
resp = SESSION.get(url, timeout=15)
except requests.RequestException as e:
tprint(f" Chyba info puzzle {n}: {e}")
return None
soup = BeautifulSoup(resp.text, "html.parser")
section = soup.select_one("section.puzzle")
if not section:
return None
short_month = section.select_one("span.short-month")
day = section.select_one("span.day")
year = section.select_one("span.year")
if not (short_month and day and year):
return None
try:
date_iso = datetime.strptime(
f"{short_month.text.strip()} {day.text.strip()} {year.text.strip()}",
"%b %d %Y",
).strftime("%Y-%m-%d")
except ValueError:
return None
diff_el = section.select_one("span.puzzle-difficulty-value")
time_el = section.select_one("span.puzzle-timing-value")
return {
"date": date_iso,
"number": n,
"difficulty": diff_el.text.strip() if diff_el else "?",
"avg_time": time_el.text.strip() if time_el else "?",
}
def make_filename(info: dict, solution: bool = False) -> str:
suffix = " [solution]" if solution else ""
avg_time = re.sub(r'[\\/:*?"<>|]', "-", info["avg_time"])
return (
f"{info['date']} Puzzle SudokuKillerGreaterThan {info['number']} "
f"[difficulty {info['difficulty']} of 10] "
f"[average solving time {avg_time}]{suffix}.pdf"
)
def download_pdf(n: int, info: dict, solution: bool = False) -> bool:
filename = make_filename(info, solution)
filepath = SAVE_DIR / filename
if filepath.exists():
return True
suffix = ".solution" if solution else ""
pdf_url = f"{BASE_URL}/pdfs/{n}{suffix}.pdf"
try:
resp = SESSION.get(pdf_url, timeout=30)
except requests.RequestException as e:
tprint(f" Chyba stahování {pdf_url}: {e}")
return False
if resp.status_code != 200:
tprint(f" PDF nedostupné (HTTP {resp.status_code}): {pdf_url}")
return False
if resp.headers.get("content-type", "").startswith("text/html"):
tprint(f" PDF vrátilo HTML: {pdf_url}")
return False
filepath.write_bytes(resp.content)
tprint(f" Staženo: {filename}")
return True
def process_puzzle(n: int, idx: int, total: int,
killer_map: dict[int, Path]) -> bool:
tprint(f"[{idx}/{total}] Puzzle #{n}")
if n in killer_map:
return rename_pair(n, killer_map[n])
# není jako SudokuKiller → stáhnout
info = get_puzzle_info(n)
time.sleep(DELAY)
if not info:
tprint(f" Puzzle {n}: info stránka nenalezena")
return False
ok1 = download_pdf(n, info, solution=False)
time.sleep(DELAY)
ok2 = download_pdf(n, info, solution=True)
return ok1 and ok2
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
tprint("Sbírám čísla GT puzzlů ze search (d=2..10, t=4)...")
gt_numbers = collect_all_gt_numbers()
tprint(f"Celkem GT puzzlů nalezeno: {len(gt_numbers)}")
already_gt = find_downloaded_gt()
killer_map = find_downloaded_killer()
to_process = [n for n in gt_numbers if n not in already_gt]
tprint(f"Již hotovo (GreaterThan): {len(already_gt)}")
tprint(f"Ke zpracování: {len(to_process)}")
if not to_process:
tprint("Vše již zpracováno.")
return
rename_count = sum(1 for n in to_process if n in killer_map)
download_count = len(to_process) - rename_count
tprint(f" → přejmenovat: {rename_count}, stáhnout: {download_count}")
ok_count = 0
err_count = 0
total = len(to_process)
tprint(f"Spouštím {NUM_THREADS} vláken...")
with ThreadPoolExecutor(max_workers=NUM_THREADS,
thread_name_prefix="ThreadPoolExecutor-0") as executor:
futures = {
executor.submit(process_puzzle, n, idx, total, killer_map): n
for idx, n in enumerate(to_process, 1)
}
for future in as_completed(futures):
if future.result():
ok_count += 1
else:
err_count += 1
tprint(f"\nHotovo. Úspěšně: {ok_count}, chyby: {err_count}")
if __name__ == "__main__":
main()
@@ -0,0 +1,230 @@
"""
Stáhne všechna Killer Sudoku puzzle + solutions z dailykillersudoku.com jako PDF.
Název souboru: YYYY-MM-DD Puzzle SudokuKiller {n} [difficulty {d} of 10] [average solving time {t}].pdf
Spuštění:
python stahni_killer_sudoku.py # stáhne vše nové od posledního stažení
python stahni_killer_sudoku.py --all # projde všechna čísla znovu (přeskočí existující)
"""
import re
import sys
import time
import threading
import argparse
from datetime import datetime
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
from bs4 import BeautifulSoup
sys.stdout.reconfigure(encoding="utf-8")
sys.stderr.reconfigure(encoding="utf-8")
BASE_URL = "https://www.dailykillersudoku.com"
SAVE_DIR = Path(__file__).parent / "DownloadedPuzzles"
SAVE_DIR.mkdir(exist_ok=True)
DELAY = 0.1 # sekundy mezi requesty v rámci jednoho vlákna
NUM_THREADS = 6 # počet souběžných vláken
# Kolik puzzle stáhnout (od nejmenšího chybějícího).
# 0 = stáhni všechna chybějící až do aktuálního.
AMOUNT_TO_DOWNLOAD = 0
SESSION = requests.Session()
SESSION.headers.update({"User-Agent": "Mozilla/5.0 (compatible; puzzle-downloader/1.0)"})
_print_lock = threading.Lock()
def tname() -> str:
name = threading.current_thread().name
if name == "MainThread":
return "[Hlavní]"
try:
return f"[T{int(name.split('_')[-1]) + 1}]"
except (ValueError, IndexError):
return f"[{name[:8]}]"
def tprint(*args, **kwargs):
with _print_lock:
print(tname(), *args, **kwargs)
def puzzle_exists(n: int) -> bool:
try:
resp = SESSION.get(f"{BASE_URL}/search?n={n}", timeout=15)
return 'section class="puzzle' in resp.text
except requests.RequestException:
return False
def get_max_puzzle_number() -> int:
"""Binárním vyhledáváním zjistí číslo nejnovějšího puzzle."""
lo, hi = 1, 99999
while lo < hi:
mid = (lo + hi + 1) // 2
if puzzle_exists(mid):
lo = mid
else:
hi = mid - 1
time.sleep(0.5)
return lo
def get_puzzle_info(n: int) -> dict | None:
url = f"{BASE_URL}/search?n={n}"
try:
resp = SESSION.get(url, timeout=15)
except requests.RequestException as e:
tprint(f" Chyba při načítání info puzzle {n}: {e}")
return None
if resp.status_code != 200:
tprint(f" Puzzle {n}: info stránka nedostupná (HTTP {resp.status_code})")
return None
soup = BeautifulSoup(resp.text, "html.parser")
section = soup.select_one("section.puzzle")
if not section:
tprint(f" Puzzle {n}: nenalezena sekce section.puzzle")
return None
short_month = section.select_one("span.short-month")
day = section.select_one("span.day")
year = section.select_one("span.year")
if not (short_month and day and year):
tprint(f" Puzzle {n}: datum nenalezeno (chybí span.short-month / .day / .year)")
return None
try:
date_iso = datetime.strptime(
f"{short_month.text.strip()} {day.text.strip()} {year.text.strip()}",
"%b %d %Y",
).strftime("%Y-%m-%d")
except ValueError as e:
tprint(f" Puzzle {n}: chyba parsování data ({e})")
return None
diff_el = section.select_one("span.puzzle-difficulty-value")
difficulty = diff_el.text.strip() if diff_el else "?"
time_el = section.select_one("span.puzzle-timing-value")
avg_time = time_el.text.strip() if time_el else "?"
return {"date": date_iso, "number": n, "difficulty": difficulty, "avg_time": avg_time}
def make_filename(info: dict, solution: bool = False) -> str:
suffix = " [solution]" if solution else ""
avg_time = re.sub(r'[\\/:*?"<>|]', '-', info["avg_time"])
return (
f"{info['date']} Puzzle SudokuKiller {info['number']} "
f"[difficulty {info['difficulty']} of 10] "
f"[average solving time {avg_time}]{suffix}.pdf"
)
def download_pdf(n: int, info: dict, solution: bool = False) -> bool:
filename = make_filename(info, solution)
filepath = SAVE_DIR / filename
if filepath.exists():
tprint(f" Přeskočeno (existuje): {filename}")
return True
suffix = ".solution" if solution else ""
pdf_url = f"{BASE_URL}/pdfs/{n}{suffix}.pdf"
try:
resp = SESSION.get(pdf_url, timeout=30)
except requests.RequestException as e:
tprint(f" Chyba stahování {pdf_url}: {e}")
return False
if resp.status_code != 200:
tprint(f" PDF nedostupné (HTTP {resp.status_code}): {pdf_url}")
return False
if resp.headers.get("content-type", "").startswith("text/html"):
tprint(f" PDF vrátilo HTML místo binárního obsahu: {pdf_url}")
return False
filepath.write_bytes(resp.content)
tprint(f" Uloženo: {filename}")
return True
def process_puzzle(n: int, idx: int, total: int) -> bool:
tprint(f"[{idx}/{total}] Puzzle #{n}...")
info = get_puzzle_info(n)
time.sleep(DELAY)
if not info:
return False
puzzle_ok = download_pdf(n, info, solution=False)
time.sleep(DELAY)
solution_ok = download_pdf(n, info, solution=True)
return puzzle_ok and solution_ok
def find_already_downloaded() -> set[int]:
downloaded = set()
for f in SAVE_DIR.glob("*Puzzle SudokuKiller*.pdf"):
m = re.search(r'SudokuKiller (\d+)', f.name)
if m:
downloaded.add(int(m.group(1)))
return downloaded
def main():
parser = argparse.ArgumentParser(description="Stáhne Killer Sudoku PDF")
parser.add_argument("--all", action="store_true", help="Projde všechna čísla od 1 (přeskočí existující)")
parser.add_argument("--start", type=int, default=1, help="Začáteční číslo puzzle (výchozí: 1)")
parser.add_argument("--end", type=int, default=None, help="Koncové číslo puzzle (výchozí: aktuální)")
args = parser.parse_args()
tprint("Zjišťuji aktuální číslo puzzle...")
max_n = get_max_puzzle_number()
tprint(f"Aktuální nejvyšší puzzle: #{max_n}")
end_n = args.end if args.end else max_n
start_n = args.start
downloaded = find_already_downloaded()
if args.all:
to_download = list(range(start_n, end_n + 1))
tprint(f"Projdu všechna puzzle #{start_n}#{end_n} (přeskočím existující soubory)")
else:
to_download = [n for n in range(start_n, end_n + 1) if n not in downloaded]
tprint(f"Již staženo: {len(downloaded)} puzzle, zbývá stáhnout: {len(to_download)}")
if AMOUNT_TO_DOWNLOAD > 0:
to_download = to_download[:AMOUNT_TO_DOWNLOAD]
tprint(f"AMOUNT_TO_DOWNLOAD={AMOUNT_TO_DOWNLOAD} → stáhnu prvních {len(to_download)} chybějících")
if not to_download:
tprint("Vše je již staženo.")
return
total = len(to_download)
ok_count = 0
err_count = 0
tprint(f"Spouštím {NUM_THREADS} vláken...")
with ThreadPoolExecutor(max_workers=NUM_THREADS, thread_name_prefix="ThreadPoolExecutor-0") as executor:
futures = {
executor.submit(process_puzzle, n, idx, total): n
for idx, n in enumerate(to_download, 1)
}
for future in as_completed(futures):
if future.result():
ok_count += 1
else:
err_count += 1
tprint(f"\nHotovo. Úspěšně: {ok_count}, chyby: {err_count}")
if __name__ == "__main__":
main()
File diff suppressed because one or more lines are too long
@@ -0,0 +1,254 @@
"""
Stáhne strukturovaná data (cage definice + řešení) z dailykillersudoku.com
a uloží do sdílené tabulky puzzles.
Funguje bez Playwright — data jsou inline v HTML jako JSON, dekóduje se base64 v Pythonu.
"""
import base64
import json
import re
import sys
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
import requests
from tqdm import tqdm
sys.stdout.reconfigure(encoding="utf-8")
sys.stderr.reconfigure(encoding="utf-8")
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from mysql_db import connect_mysql
PUZZLE_TYPE_MAP = {1: "killer_sudoku", 2: "killer_sudoku_gt"}
BASE_URL = "https://www.dailykillersudoku.com/puzzle/{}"
def fetch_puzzle_json(puzzle_number: int) -> dict | None:
url = BASE_URL.format(puzzle_number)
try:
r = requests.get(url, timeout=15)
if r.status_code != 200:
return None
m = re.search(r'new DKS\.Puzzle\((\{.*?\})\)', r.text)
if not m:
return None
return json.loads(m.group(1))
except Exception:
return None
def decode_board(board_b64: str) -> tuple[list[list[int]], list[int]]:
"""Dekóduje board_base64 → (cage_map 9x9, cage_sums)."""
raw = base64.b64decode(board_b64)
# Header: 2 bytes, pak 81 × 2 bytes (uint16 BE cage IDs), pak N bytes (sums)
cell_data = raw[2:2 + 81 * 2]
sum_data = raw[2 + 81 * 2:]
cage_map = []
for r in range(9):
row = []
for c in range(9):
idx = (r * 9 + c) * 2
cage_id = (cell_data[idx] << 8) | cell_data[idx + 1]
row.append(cage_id)
cage_map.append(row)
cage_sums = list(sum_data)
return cage_map, cage_sums
def decode_solution(solution_b64: str) -> list[list[int]]:
"""Dekóduje solution_base64 → 9x9 mřížka."""
raw = base64.b64decode(solution_b64)
values = list(raw[2:]) # skip 2-byte header
return [values[r * 9:(r + 1) * 9] for r in range(9)]
def build_cages_string(cage_map: list[list[int]], cage_sums: list[int]) -> str:
"""Vytvoří cage string ve formátu: sum,r0c0r0c1|sum,r1c2r1c3|..."""
cages = {}
for r in range(9):
for c in range(9):
cid = cage_map[r][c]
if cid not in cages:
cages[cid] = []
cages[cid].append(f"r{r}c{c}")
parts = []
for cid in sorted(cages.keys()):
s = cage_sums[cid] if cid < len(cage_sums) else 0
cells = "".join(cages[cid])
parts.append(f"{s},{cells}")
return "|".join(parts)
def build_solution_string(solution: list[list[int]]) -> str:
return "".join(str(v) for row in solution for v in row)
def process_puzzle(puzzle_number: int) -> dict | None:
pj = fetch_puzzle_json(puzzle_number)
if not pj:
return None
try:
cage_map, cage_sums = decode_board(pj["board_base64"])
solution = decode_solution(pj["solution_base64"])
cage_str = build_cages_string(cage_map, cage_sums)
sol_str = build_solution_string(solution)
game_type = PUZZLE_TYPE_MAP.get(pj.get("puzzle_type", 1), "killer_sudoku")
return {
"puzzle_number": pj["id"],
"game_type": game_type,
"difficulty": str(pj.get("difficulty", 0)),
"puzzle_date": pj.get("date"),
"puzzle": cage_str,
"solution": sol_str,
"extra": json.dumps({
"grid_size": 9,
"puzzle_number": pj["id"],
"original_difficulty": pj.get("difficulty"),
}),
"source": "dailykillersudoku.com",
}
except Exception as e:
return None
def save_batch(results: list[dict]):
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
inserted = 0
for r in results:
cur.execute(
"INSERT INTO puzzles "
"(game_type, difficulty, puzzle_date, puzzle, solution, extra, source) "
"VALUES (%s, %s, %s, %s, %s, %s, %s) "
"ON DUPLICATE KEY UPDATE puzzle=VALUES(puzzle), solution=VALUES(solution), "
"extra=VALUES(extra)",
(r["game_type"], r["difficulty"], r["puzzle_date"],
r["puzzle"], r["solution"], r["extra"], r["source"]),
)
if cur.rowcount > 0:
inserted += 1
cur.close()
conn.close()
return inserted
def get_puzzle_numbers() -> list[int]:
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
cur.execute("SELECT puzzle_number FROM sudoku_killer ORDER BY puzzle_number")
nums = [row[0] for row in cur.fetchall()]
cur.close()
conn.close()
return nums
JSON_FILE = Path(__file__).parent / "killer_structured_data.json"
def download_all(puzzle_numbers: list[int]) -> list[dict]:
"""Stáhne všechna puzzle z webu, průběžně ukládá do JSON souboru."""
all_results = []
if JSON_FILE.exists():
all_results = json.loads(JSON_FILE.read_text(encoding="utf-8"))
print(f"Načteno {len(all_results)} existujících záznamů z JSON")
done_numbers = {r["puzzle_number"] for r in all_results}
remaining = [n for n in puzzle_numbers if n not in done_numbers]
print(f"Zbývá stáhnout: {len(remaining)} z {len(puzzle_numbers)}")
if not remaining:
return all_results
batch_size = 100
errors = 0
with ThreadPoolExecutor(max_workers=6) as executor:
for start in tqdm(range(0, len(remaining), batch_size),
desc="Stahování", unit="batch"):
batch_nums = remaining[start:start + batch_size]
futures = {executor.submit(process_puzzle, n): n for n in batch_nums}
for future in as_completed(futures):
result = future.result()
if result:
all_results.append(result)
else:
errors += 1
JSON_FILE.write_text(
json.dumps(all_results, ensure_ascii=False), encoding="utf-8"
)
print(f"Staženo celkem: {len(all_results)}, chyb: {errors}")
return all_results
def import_from_json():
"""Importuje data z JSON souboru do MySQL."""
if not JSON_FILE.exists():
print("JSON soubor neexistuje, nejdřív spusť stahování.")
return
all_results = json.loads(JSON_FILE.read_text(encoding="utf-8"))
print(f"Importuji {len(all_results)} záznamů z JSON do MySQL...")
batch_size = 500
total_inserted = 0
for start in tqdm(range(0, len(all_results), batch_size),
desc="Import", unit="batch"):
batch = all_results[start:start + batch_size]
inserted = save_batch(batch)
total_inserted += inserted
print(f"Import hotov: aktualizováno {total_inserted} záznamů")
def main():
# Test na jednom puzzle
print("=== Test: puzzle 376 ===")
result = process_puzzle(376)
if result:
print(f" game_type: {result['game_type']}")
print(f" difficulty: {result['difficulty']}")
print(f" date: {result['puzzle_date']}")
print(f" cages ({len(result['puzzle'].split('|'))} klecí): {result['puzzle'][:100]}...")
print(f" solution: {result['solution']}")
else:
print(" Selhalo!")
return
if "--import" in sys.argv:
import_from_json()
return
if "--run" not in sys.argv:
print("\nPro stažení spusť s --run, pro import z JSON s --import")
return
puzzle_numbers = get_puzzle_numbers()
print(f"\nCelkem puzzle k zpracování: {len(puzzle_numbers)}")
all_results = download_all(puzzle_numbers)
print("\nImportuji do MySQL...")
batch_size = 500
total_inserted = 0
for start in tqdm(range(0, len(all_results), batch_size),
desc="Import", unit="batch"):
batch = all_results[start:start + batch_size]
inserted = save_batch(batch)
total_inserted += inserted
print(f"\nHotovo: aktualizováno {total_inserted} záznamů")
if __name__ == "__main__":
main()
@@ -0,0 +1,293 @@
"""
Vykreslí Killer Sudoku puzzle do PDF z dat v MySQL tabulce puzzles.
"""
import json
import os
import re
import sys
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import cm
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen.canvas import Canvas
from mysql_db import connect_mysql
_fonts_dir = os.path.join(os.environ.get("WINDIR", r"C:\Windows"), "Fonts")
pdfmetrics.registerFont(TTFont("Arial", os.path.join(_fonts_dir, "arial.ttf")))
pdfmetrics.registerFont(TTFont("ArialBold", os.path.join(_fonts_dir, "arialbd.ttf")))
OUTPUT = Path(__file__).parent / "killer_sudoku_31414.pdf"
def parse_cages(puzzle_str: str) -> list[dict]:
cages = []
for part in puzzle_str.split("|"):
target, cells_str = part.split(",", 1)
cells = [(int(m[1]), int(m[2])) for m in re.finditer(r"r(\d)c(\d)", cells_str)]
cages.append({"sum": int(target), "cells": cells})
return cages
def build_cage_map(cages: list[dict]) -> list[list[int]]:
cage_map = [[-1] * 9 for _ in range(9)]
for i, cage in enumerate(cages):
for row, col in cage["cells"]:
cage_map[row][col] = i
return cage_map
def cage_label_cell(cage: dict) -> tuple[int, int]:
return min(cage["cells"], key=lambda c: (c[0], c[1]))
def parse_solution(solution_str: str) -> list[list[int]]:
return [[int(solution_str[r * 9 + c]) for c in range(9)] for r in range(9)]
def draw_killer_sudoku(c: Canvas, x0: float, y0: float, cell: float,
cages: list[dict], cage_map: list[list[int]],
title: str = "", solution: list[list[int]] | None = None):
label_font = max(cell * 0.22, 5)
num_font = max(cell * 0.45, 7)
thin = 0.3
cage_line = 1.0
thick = 2.2
if title:
c.setFont("ArialBold", 12)
c.drawString(x0, y0 + 5, title)
# Bílé pozadí
c.setFillColor(colors.white)
c.rect(x0, y0 - 9 * cell, 9 * cell, 9 * cell, fill=1, stroke=0)
# Řešení
if solution:
c.setFillColor(colors.Color(0.25, 0.25, 0.25))
c.setFont("Arial", num_font)
for r in range(9):
for co in range(9):
cx = x0 + co * cell + cell / 2
cy = y0 - (r + 1) * cell + cell * 0.28
c.drawCentredString(cx, cy, str(solution[r][co]))
# --- Vrstva 1: tečkované ohraničení klecí (KRESLÍ SE JAKO PRVNÍ) ---
# Tečkované jdou ZESPODU, mřížka přes ně. Tím se zachová čistý vzhled —
# tenká mřížka překryje místa, kde tečkovaná protíná čáru mřížky.
inset = cell * 0.10
c.setStrokeColor(colors.Color(0.2, 0.2, 0.2))
c.setLineWidth(cage_line * 0.5)
c.setDash(3, 2)
has_top = [[r == 0 or cage_map[r - 1][co] != cage_map[r][co]
for co in range(9)] for r in range(9)]
has_bot = [[r == 8 or cage_map[r + 1][co] != cage_map[r][co]
for co in range(9)] for r in range(9)]
has_lft = [[co == 0 or cage_map[r][co - 1] != cage_map[r][co]
for co in range(9)] for r in range(9)]
has_rgt = [[co == 8 or cage_map[r][co + 1] != cage_map[r][co]
for co in range(9)] for r in range(9)]
def in_cg(rr, cc, cid):
return 0 <= rr <= 8 and 0 <= cc <= 8 and cage_map[rr][cc] == cid
# Top borders — slévání v řádku
# Vnější roh: zkrácení o inset. Vnitřní roh: prodloužení o inset.
for r in range(9):
co = 0
while co < 9:
if not has_top[r][co]:
co += 1
continue
cid = cage_map[r][co]
s = co
while co < 9 and cage_map[r][co] == cid and has_top[r][co]:
co += 1
# Levý konec: vnitřní roh když (r, s-1) je v kleci (cage tam pokračuje směrem nahoru)
x_s = x0 + s * cell + (-inset if in_cg(r, s - 1, cid) else inset)
# Pravý konec: vnitřní roh když (r, co) je v kleci
x_e = x0 + co * cell + (inset if in_cg(r, co, cid) else -inset)
c.line(x_s, y0 - r * cell - inset, x_e, y0 - r * cell - inset)
# Bottom borders
for r in range(9):
co = 0
while co < 9:
if not has_bot[r][co]:
co += 1
continue
cid = cage_map[r][co]
s = co
while co < 9 and cage_map[r][co] == cid and has_bot[r][co]:
co += 1
x_s = x0 + s * cell + (-inset if in_cg(r, s - 1, cid) else inset)
x_e = x0 + co * cell + (inset if in_cg(r, co, cid) else -inset)
c.line(x_s, y0 - (r + 1) * cell + inset, x_e, y0 - (r + 1) * cell + inset)
# Left borders — slévání ve sloupci
for co in range(9):
r = 0
while r < 9:
if not has_lft[r][co]:
r += 1
continue
cid = cage_map[r][co]
s = r
while r < 9 and cage_map[r][co] == cid and has_lft[r][co]:
r += 1
# Horní konec: vnitřní roh když (s-1, co) je v kleci
y_s = y0 - s * cell + (inset if in_cg(s - 1, co, cid) else -inset)
# Dolní konec: vnitřní roh když (r, co) je v kleci
y_e = y0 - r * cell + (-inset if in_cg(r, co, cid) else inset)
c.line(x0 + co * cell + inset, y_s, x0 + co * cell + inset, y_e)
# Right borders
for co in range(9):
r = 0
while r < 9:
if not has_rgt[r][co]:
r += 1
continue
cid = cage_map[r][co]
s = r
while r < 9 and cage_map[r][co] == cid and has_rgt[r][co]:
r += 1
y_s = y0 - s * cell + (inset if in_cg(s - 1, co, cid) else -inset)
y_e = y0 - r * cell + (-inset if in_cg(r, co, cid) else inset)
c.line(x0 + (co + 1) * cell - inset, y_s, x0 + (co + 1) * cell - inset, y_e)
c.setDash()
# --- Vrstva 2: kompletní sudoku mřížka (tenké plné čáry přes tečkované) ---
c.setStrokeColor(colors.Color(0.55, 0.55, 0.55))
c.setLineWidth(thin)
for i in range(1, 9):
c.line(x0, y0 - i * cell, x0 + 9 * cell, y0 - i * cell)
c.line(x0 + i * cell, y0, x0 + i * cell, y0 - 9 * cell)
# --- Vrstva 3: tlusté 3×3 čáry + vnější okraj ---
c.setStrokeColor(colors.black)
c.setLineWidth(thick)
for i in range(0, 10, 3):
c.line(x0, y0 - i * cell, x0 + 9 * cell, y0 - i * cell)
c.line(x0 + i * cell, y0, x0 + i * cell, y0 - 9 * cell)
# Popisky klecí (součty) — nakonec, aby nebyly překryty čarami
c.setFillColor(colors.white)
c.setFont("ArialBold", label_font)
for cage in cages:
if not cage["cells"]:
continue
row, col = cage_label_cell(cage)
lx = x0 + col * cell + cell * 0.05
ly = y0 - row * cell - label_font * 1.05
txt = str(cage["sum"])
tw = c.stringWidth(txt, "ArialBold", label_font)
c.rect(lx - 0.5, ly - 0.5, tw + 1, label_font + 1, fill=1, stroke=0)
c.setFillColor(colors.black)
c.setFont("ArialBold", label_font)
for cage in cages:
if not cage["cells"]:
continue
row, col = cage_label_cell(cage)
lx = x0 + col * cell + cell * 0.05
ly = y0 - row * cell - label_font * 1.05
c.drawString(lx, ly, str(cage["sum"]))
def generate_pdf(puzzles: list[dict], output_path: Path):
"""puzzles: game_type, difficulty, puzzle_str, solution_str, puzzle_date"""
BOARD_CM = 11
SOL_CM = 6
GAP = 1.5 * cm
page_w, page_h = A4
prepped = []
for p in puzzles:
cages = parse_cages(p["puzzle_str"])
cage_map = build_cage_map(cages)
solution = parse_solution(p["solution_str"])
cell = BOARD_CM * cm / 9
prepped.append((p, cages, cage_map, solution, cell))
c = Canvas(str(output_path), pagesize=A4)
for i in range(0, len(prepped), 2):
for j, (p, cages, cage_map, _, cell) in enumerate(prepped[i:i + 2]):
board = 9 * cell
x0 = (page_w - board) / 2
y0 = page_h - 2 * cm - j * (BOARD_CM * cm + 3 * cm)
label = "Killer GT" if p.get("game_type") == "killer_sudoku_gt" else "Killer Sudoku"
draw_killer_sudoku(c, x0, y0, cell, cages, cage_map,
f"{label} (diff {p['difficulty']}) — {p['puzzle_date']}")
c.showPage()
c.setFont("ArialBold", 14)
c.drawCentredString(page_w / 2, page_h - 2 * cm, "Řešení")
y_cursor = page_h - 3.5 * cm
for p, cages, cage_map, solution, _ in prepped:
sol_cell = SOL_CM * cm / 9
sol_board = 9 * sol_cell
x0 = (page_w - sol_board) / 2
draw_killer_sudoku(c, x0, y_cursor, sol_cell, cages, cage_map,
f"diff {p['difficulty']}", solution=solution)
y_cursor -= sol_board + GAP
c.showPage()
c.save()
def main():
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
cur.execute(
"SELECT difficulty, puzzle, solution, extra FROM puzzles "
"WHERE game_type='killer_sudoku' AND extra LIKE '%%\"puzzle_number\": 31414%%' "
"LIMIT 1"
)
row = cur.fetchone()
cur.close()
conn.close()
if not row:
print("Žádná data.")
return
difficulty, puzzle_str, solution_str, extra_json = row
extra = json.loads(extra_json)
cages = parse_cages(puzzle_str)
cage_map = build_cage_map(cages)
solution = parse_solution(solution_str)
page_w, page_h = A4
board_cm = 11
cell = board_cm * cm / 9
board_px = 9 * cell
c = Canvas(str(OUTPUT), pagesize=A4)
# Zadání
x0 = (page_w - board_px) / 2
y0 = page_h - 2 * cm
draw_killer_sudoku(c, x0, y0, cell, cages, cage_map,
f"Killer Sudoku (difficulty {difficulty}) — {extra.get('puzzle_number', '')}")
# Řešení
y0_sol = y0 - board_px - 3 * cm
draw_killer_sudoku(c, x0, y0_sol, cell, cages, cage_map,
"Řešení", solution=solution)
c.save()
print(f"PDF uloženo: {OUTPUT}")
if __name__ == "__main__":
main()
+118
View File
@@ -0,0 +1,118 @@
"""
Vybere 2 puzzle každého typu z MySQL a vygeneruje PDF (jedno na typ).
Použití: python PuzzleSelection.py [YYYY-MM-DD]
Výchozí datum: dnes.
"""
import json
import sys
from datetime import date
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
BASE = Path(__file__).parent
sys.path.insert(0, str(BASE.parent / "Knihovny"))
sys.path.insert(0, str(BASE / "DailyCalcudoku"))
sys.path.insert(0, str(BASE / "DailyKakuro"))
sys.path.insert(0, str(BASE / "DailySudoku"))
sys.path.insert(0, str(BASE / "DailySuguru"))
sys.path.insert(0, str(BASE / "DailyStr8ts"))
sys.path.insert(0, str(BASE / "DailySudokuKiller"))
from mysql_db import connect_mysql
from vykresli_calcudoku import generate_pdf as gen_calcudoku
from vykresli_kakuro import generate_pdf as gen_kakuro
from vykresli_sudoku import generate_pdf as gen_sudoku
from vykresli_suguru import generate_pdf as gen_suguru
from vykresli_puzzle import generate_pdf as gen_str8ts
from vykresli_killer_sudoku import generate_pdf as gen_killer
PUZZLE_DATE = sys.argv[1] if len(sys.argv) > 1 else date.today().isoformat()
def query_by_date(cur, game_type):
cur.execute(
"SELECT game_type, difficulty, puzzle, solution, extra FROM puzzles "
"WHERE game_type=%s AND puzzle_date=%s "
"ORDER BY RAND() LIMIT 2",
[game_type, PUZZLE_DATE],
)
return cur.fetchall()
def query_killer(cur, game_type):
cur.execute(
"SELECT game_type, difficulty, puzzle, solution, extra, puzzle_date FROM puzzles "
"WHERE game_type=%s AND puzzle_date <= %s "
"ORDER BY RAND() LIMIT 2",
[game_type, PUZZLE_DATE],
)
return cur.fetchall()
def run(label, puzzles, gen_fn, filename):
if not puzzles:
print(f"{label}: žádná data pro {PUZZLE_DATE}")
return
out = BASE / filename
gen_fn(puzzles, out)
print(f"{label}: {out.name}")
def main():
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
# Calcudoku
rows = query_by_date(cur, "calcudoku")
run("calcudoku", [
{"difficulty": r[1], "cages_str": r[2], "solution_str": r[3],
"grid_size": json.loads(r[4])["grid_size"], "puzzle_date": PUZZLE_DATE}
for r in rows
], gen_calcudoku, f"calcudoku_{PUZZLE_DATE}.pdf")
# Kakuro
rows = query_by_date(cur, "kakuro")
run("kakuro", [
{"difficulty": r[1], "puzzle_str": r[2], "puzzle_date": PUZZLE_DATE}
for r in rows
], gen_kakuro, f"kakuro_{PUZZLE_DATE}.pdf")
# Sudoku
rows = query_by_date(cur, "sudoku")
run("sudoku", [
{"difficulty": r[1], "puzzle": r[2], "solution": r[3], "puzzle_date": PUZZLE_DATE}
for r in rows
], gen_sudoku, f"sudoku_{PUZZLE_DATE}.pdf")
# Suguru
rows = query_by_date(cur, "suguru")
run("suguru", [
{"difficulty": r[1], "puzzle_str": r[2], "solution_str": r[3],
"grid_size": json.loads(r[4])["grid_size"], "puzzle_date": PUZZLE_DATE}
for r in rows
], gen_suguru, f"suguru_{PUZZLE_DATE}.pdf")
# Str8ts
rows = query_by_date(cur, "str8ts")
run("str8ts", [
{"difficulty": r[1], "puzzle": r[2], "solution": r[3],
"bw": json.loads(r[4])["bw"], "puzzle_date": PUZZLE_DATE}
for r in rows
], gen_str8ts, f"str8ts_{PUZZLE_DATE}.pdf")
# Killer Sudoku — 2 nejnovější
rows = query_killer(cur, "killer_sudoku")
run("killer_sudoku", [
{"game_type": r[0], "difficulty": r[1], "puzzle_str": r[2],
"solution_str": r[3], "puzzle_date": str(r[5])}
for r in rows
], gen_killer, f"killer_sudoku_{PUZZLE_DATE}.pdf")
cur.close()
conn.close()
if __name__ == "__main__":
main()