notebookvb

This commit is contained in:
Vladimir Buzalka
2026-05-08 22:06:57 +02:00
parent c9903646f1
commit c4c0d1d435
14 changed files with 1666 additions and 0 deletions
+88
View File
@@ -0,0 +1,88 @@
# SudokuKiller — technické poznámky
## Přehled skriptů
### Stahování PDF (původní pipeline)
| Skript | Popis |
|--------|-------|
| `stahni_killer_sudoku.py` | Stáhne puzzle + solution PDF z dailykillersudoku.com |
| `stahni_greater_than.py` | Stáhne Greater-Than variantu, přejmenuje existující |
| `import_do_mysql.py` | Importuje PDF soubory do MySQL tabulky `sudoku_killer` (binární bloby) |
| `30_BatchCrop.py` | Ořeže PDF (odstraní hlavičky/patičky), uloží zpět do DB |
### Strukturovaná data (nový pipeline)
| Skript | Popis |
|--------|-------|
| `stahni_killer_structured.py` | Stáhne strukturovaná data (cage definice + řešení) z webu do sdílené tabulky `puzzles` |
| `preskumaj_killer_data*.py` | Průzkumné skripty pro reverzní inženýrství datového formátu |
## Zdroj dat
Web: https://www.dailykillersudoku.com/
Každý puzzle má stránku `/puzzle/{N}` s inline JSON daty v HTML:
```javascript
DKS.puzzle = new DKS.Puzzle({
"id": 376,
"date": "2009-05-04",
"difficulty": 4,
"board_base64": "AZoACQAE...",
"solution_base64": "AJoICQIG...",
"puzzle_type": 1
})
```
## Dekódování base64
### board_base64
- 2 bajty header (puzzle_type, flags)
- 81 × 2 bajty = 162 bajtů — cage ID pro každou buňku (uint16 big-endian)
- N bajtů — součet pro každou klec (1 bajt = max 255)
### solution_base64
- 2 bajty header
- 81 bajtů — čísla řešení (řádek po řádku)
## Typy puzzle
| puzzle_type | game_type v DB | Popis |
|-------------|----------------|-------|
| 1 | `killer_sudoku` | Killer Sudoku — klece se součty |
| 2 | `killer_sudoku_gt` | Greater-Than Killer Sudoku — klece + nerovnosti |
## Obtížnost
Škála 110 (z webu), uložena v `difficulty`.
## MySQL — původní tabulka `sudoku_killer`
Obsahuje binární PDF v `file_puzzle` / `file_solution` / `file_puzzle_cropped`.
- 19 106 KillerSudoku (puzzle 131414, 20092026)
- 11 405 GreaterThan (puzzle 173031416, 20102026)
## MySQL — sdílená tabulka `puzzles`
Strukturovaná data (cage definice + řešení):
- `game_type` = `'killer_sudoku'` / `'killer_sudoku_gt'`
- `difficulty` = `'1'``'10'`
- `puzzle` = klece ve formátu `sum,r0c1r0c2|sum,r3c4r3c5|...`
- `solution` = flat string 81 číslic
- `extra` = `{"grid_size": 9, "puzzle_number": 376, "original_difficulty": 4}`
- `source` = `'dailykillersudoku.com'`
## Layout a tisk
V podadresáři `Testy/` jsou experimentální skripty pro:
- Ořezávání PDF (ray-cast detekce mřížky)
- Škálování a umístění 2 puzzle na A4
- Layout konfigurace (`layouts.json`)
## Závislosti
- `requests`, `beautifulsoup4` — HTTP + HTML parsing
- `fitz` (PyMuPDF) — PDF manipulace, ray-cast cropping
- `pypdf` — PDF čtení/zápis
- `playwright` — průzkumné skripty (není potřeba pro produkční stahování)
- `tqdm` — progress bar
@@ -0,0 +1,36 @@
"""
Exportuje originální PDF puzzle z tabulky sudoku_killer pro porovnání.
"""
import sys
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from mysql_db import connect_mysql
OUTPUT_DIR = Path(__file__).parent
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
cur.execute(
"SELECT puzzle_number, file_puzzle, file_solution "
"FROM sudoku_killer WHERE puzzle_number = 31414"
)
row = cur.fetchone()
cur.close()
conn.close()
if not row:
print("Puzzle 31414 nenalezen v sudoku_killer.")
else:
num, pdf_puzzle, pdf_solution = row
if pdf_puzzle:
path = OUTPUT_DIR / f"original_{num}_puzzle.pdf"
path.write_bytes(pdf_puzzle)
print(f"Uloženo: {path}")
if pdf_solution:
path = OUTPUT_DIR / f"original_{num}_solution.pdf"
path.write_bytes(pdf_solution)
print(f"Uloženo: {path}")
File diff suppressed because one or more lines are too long
@@ -0,0 +1,184 @@
"""
Průzkumný skript: zkouší najít strukturovaná data puzzle
na dailykillersudoku.com (cage definice, řešení).
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/70000"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
# Zachytávat network requesty
api_responses = []
async def on_response(response):
url = response.url
if any(k in url for k in ["api", "puzzle", "data", "json", "cage", "grid"]):
try:
body = await response.text()
api_responses.append({"url": url, "status": response.status, "body": body[:2000]})
except:
api_responses.append({"url": url, "status": response.status, "body": "(could not read)"})
page.on("response", on_response)
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# 1) Network requesty
print("\n=== Zachycené API/data requesty ===")
for r in api_responses:
print(f"\n URL: {r['url']}")
print(f" Status: {r['status']}")
if r['body'] and len(r['body']) < 2000:
print(f" Body: {r['body'][:500]}")
# 2) Globální JS proměnné
print("\n=== Globální proměnné ===")
globals_check = await page.evaluate("""() => {
const names = ['puzzle', 'puzzleData', 'gameData', 'game', 'board',
'grid', 'cages', 'cells', 'solution', 'killerData',
'sudoku', 'level', 'data', 'config', 'state',
'app', 'store', 'vuex', '__NUXT__', '__NEXT_DATA__',
'initialData', 'pageData', 'props', 'serverData'];
const found = {};
for (const name of names) {
if (typeof window[name] !== 'undefined') {
const val = window[name];
found[name] = {
type: typeof val,
keys: typeof val === 'object' && val !== null ? Object.keys(val).slice(0, 20) : null
};
}
}
return found;
}""")
print(json.dumps(globals_check, indent=2))
# 3) SVG/Canvas analýza
print("\n=== SVG/Canvas elementy ===")
svg_info = await page.evaluate("""() => {
const svgs = document.querySelectorAll('svg');
const canvases = document.querySelectorAll('canvas');
return {
svg_count: svgs.length,
canvas_count: canvases.length,
svg_ids: Array.from(svgs).map(s => s.id || s.className || '(no id)').slice(0, 5),
canvas_ids: Array.from(canvases).map(c => c.id || c.className || '(no id)').slice(0, 5)
};
}""")
print(json.dumps(svg_info, indent=2))
# 4) Data atributy
print("\n=== Elementy s data- atributy ===")
data_attrs = await page.evaluate("""() => {
const all = document.querySelectorAll('[data-cage], [data-cell], [data-sum], [data-group], [data-value], [data-row], [data-col]');
return {
count: all.length,
samples: Array.from(all).slice(0, 5).map(el => ({
tag: el.tagName,
attrs: Object.fromEntries(Array.from(el.attributes).filter(a => a.name.startsWith('data-')).map(a => [a.name, a.value]))
}))
};
}""")
print(json.dumps(data_attrs, indent=2))
# 5) Tabulky a mřížky
print("\n=== Tabulky / grid struktury ===")
tables = await page.evaluate("""() => {
const tables = document.querySelectorAll('table');
const grids = document.querySelectorAll('[class*=grid], [class*=puzzle], [class*=board], [class*=cage], [class*=cell], [id*=grid], [id*=puzzle], [id*=board]');
return {
table_count: tables.length,
grid_elements: Array.from(grids).slice(0, 10).map(el => ({
tag: el.tagName,
id: el.id,
class: el.className.toString().substring(0, 100),
children: el.children.length
}))
};
}""")
print(json.dumps(tables, indent=2))
# 6) Script tagy s daty
print("\n=== Script tagy s daty ===")
scripts = await page.evaluate("""() => {
const scripts = document.querySelectorAll('script');
const results = [];
for (const s of scripts) {
const text = s.textContent || '';
if (text.length > 10 && text.length < 50000) {
const keywords = ['puzzle', 'cage', 'cell', 'grid', 'solution', 'board', 'sum'];
const found = keywords.filter(k => text.toLowerCase().includes(k));
if (found.length > 0) {
results.push({
keywords: found,
length: text.length,
snippet: text.substring(0, 500)
});
}
}
}
return results;
}""")
print(json.dumps(scripts, indent=2, ensure_ascii=False)[:5000])
# 7) Vue/React/Angular state
print("\n=== Framework state ===")
framework = await page.evaluate("""() => {
// Vue
const vueEl = document.querySelector('[data-v-app]') || document.querySelector('#app') || document.querySelector('#__nuxt');
let vueData = null;
if (vueEl && vueEl.__vue_app__) {
vueData = 'Vue 3 app found';
} else if (vueEl && vueEl.__vue__) {
vueData = 'Vue 2 app found';
try {
const d = vueEl.__vue__.$data;
vueData = {type: 'Vue 2', keys: Object.keys(d)};
} catch(e) {}
}
// __NUXT__
if (typeof __NUXT__ !== 'undefined') {
try { vueData = {type: 'Nuxt', keys: Object.keys(__NUXT__)}; } catch(e) {}
}
// React
let reactData = null;
const reactRoot = document.querySelector('#__next') || document.querySelector('#root');
if (reactRoot) {
const fiberKey = Object.keys(reactRoot).find(k => k.startsWith('__reactFiber') || k.startsWith('__reactInternalInstance'));
if (fiberKey) reactData = 'React app found';
}
return {vue: vueData, react: reactData};
}""")
print(json.dumps(framework, indent=2))
# 8) Všechny window properties (custom)
print("\n=== Custom window properties ===")
custom_props = await page.evaluate("""() => {
const iframe = document.createElement('iframe');
document.body.appendChild(iframe);
const defaultKeys = new Set(Object.keys(iframe.contentWindow));
document.body.removeChild(iframe);
const custom = Object.keys(window).filter(k => !defaultKeys.has(k) && !k.startsWith('__'));
return custom.slice(0, 50);
}""")
print(json.dumps(custom_props, indent=2))
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,174 @@
"""
Průzkumný skript v2: zkouší najít strukturovaná data puzzle
na dailykillersudoku.com — prozkoumá DKS objekt a platný puzzle.
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/376"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
api_responses = []
async def on_response(response):
url = response.url
if "dailykillersudoku" in url and url != URL:
try:
ct = response.headers.get("content-type", "")
if "json" in ct or "javascript" in ct or "text" in ct:
body = await response.text()
if len(body) < 5000:
api_responses.append({"url": url, "status": response.status, "body": body[:2000]})
else:
api_responses.append({"url": url, "status": response.status, "body": f"({len(body)} chars)"})
except:
pass
page.on("response", on_response)
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# 1) DKS objekt — klíče
print("\n=== DKS objekt — klíče ===")
dks = await page.evaluate("""() => {
if (typeof DKS === 'undefined') return null;
const result = {};
for (const key of Object.keys(DKS)) {
const val = DKS[key];
const t = typeof val;
if (t === 'function') {
result[key] = 'function';
} else if (t === 'object' && val !== null) {
result[key] = {type: 'object', keys: Object.keys(val).slice(0, 15)};
} else {
result[key] = val;
}
}
return result;
}""")
if dks:
print(json.dumps(dks, indent=2, ensure_ascii=False)[:5000])
# 2) DKS.board nebo podobné puzzle objekty
print("\n=== DKS puzzle-related data ===")
puzzle_data = await page.evaluate("""() => {
if (typeof DKS === 'undefined') return null;
const result = {};
const interesting = ['board', 'puzzle', 'game', 'grid', 'cages', 'cells',
'solution', 'currentPuzzle', 'puzzleData', 'data',
'sudoku', 'killer', 'state'];
for (const key of Object.keys(DKS)) {
if (interesting.some(i => key.toLowerCase().includes(i))) {
try {
result[key] = JSON.parse(JSON.stringify(DKS[key]));
} catch(e) {
result[key] = String(DKS[key]).substring(0, 200);
}
}
}
return result;
}""")
if puzzle_data:
print(json.dumps(puzzle_data, indent=2, ensure_ascii=False)[:8000])
else:
print(" žádné puzzle data")
# 3) Script tagy s puzzle daty
print("\n=== Script tagy s puzzle daty ===")
scripts = await page.evaluate("""() => {
const scripts = document.querySelectorAll('script');
const results = [];
for (const s of scripts) {
const text = s.textContent || '';
if (text.includes('cage') || text.includes('cell') || text.includes('solution')
|| text.includes('group') || text.includes('sum') || text.includes('Board')
|| text.includes('Puzzle')) {
results.push({
length: text.length,
snippet: text.substring(0, 1000)
});
}
}
return results;
}""")
print(json.dumps(scripts, indent=2, ensure_ascii=False)[:8000])
# 4) Zachycené requesty
print("\n=== Zachycené requesty (dailykillersudoku) ===")
for r in api_responses:
print(f"\n URL: {r['url']}")
print(f" Status: {r['status']}")
print(f" Body: {r['body'][:500]}")
# 5) SVG obsah — puzzle mřížka
print("\n=== SVG puzzle mřížka ===")
svg_data = await page.evaluate("""() => {
const svgs = document.querySelectorAll('svg');
const results = [];
for (const svg of svgs) {
const html = svg.outerHTML;
if (html.length > 1000) {
// Pravděpodobně puzzle mřížka
const texts = svg.querySelectorAll('text');
const textContent = Array.from(texts).map(t => ({
text: t.textContent,
x: t.getAttribute('x'),
y: t.getAttribute('y'),
class: t.getAttribute('class')
}));
const paths = svg.querySelectorAll('path');
results.push({
size: html.length,
width: svg.getAttribute('width'),
height: svg.getAttribute('height'),
viewBox: svg.getAttribute('viewBox'),
text_count: texts.length,
path_count: paths.length,
texts: textContent.slice(0, 30)
});
}
}
return results;
}""")
print(json.dumps(svg_data, indent=2, ensure_ascii=False)[:5000])
# 6) Hledej inline JS s daty puzzle
print("\n=== Inline JS s puzzle daty ===")
inline_data = await page.evaluate("""() => {
const html = document.documentElement.innerHTML;
// Hledej vzory jako JSON pole, cage definice apod.
const patterns = [
/DKS\.\w+\s*=\s*(\{[^}]{20,}\})/g,
/DKS\.\w+\s*=\s*(\[[^\]]{20,}\])/g,
/var\s+\w+\s*=\s*(\{[^}]{50,}\})/g,
/puzzl\w*\s*[:=]\s*["'{[]/gi
];
const found = [];
for (const p of patterns) {
let m;
while ((m = p.exec(html)) !== null) {
found.push(m[0].substring(0, 300));
}
}
return found;
}""")
print(json.dumps(inline_data, indent=2, ensure_ascii=False)[:3000])
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,156 @@
"""
Průzkumný skript v3: dekóduje board_base64 a solution_base64
z dailykillersudoku.com — zjistí formát dat.
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/376"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# 1) Vytáhni JSON puzzle dat
print("\n=== Puzzle JSON ===")
puzzle_json = await page.evaluate("""() => {
return DKS.puzzle._json;
}""")
print(json.dumps(puzzle_json, indent=2))
# 2) Dekóduj base64 → raw bytes
print("\n=== board_base64 dekódováno ===")
board_bytes = await page.evaluate("""() => {
const b64 = DKS.puzzle._json.board_base64;
const bytes = DKS.base64ToByteArray(b64);
return Array.from(bytes);
}""")
print(f" Délka: {len(board_bytes)} bytes")
print(f" Raw: {board_bytes}")
print("\n=== solution_base64 dekódováno ===")
sol_bytes = await page.evaluate("""() => {
const b64 = DKS.puzzle._json.solution_base64;
const bytes = DKS.base64ToByteArray(b64);
return Array.from(bytes);
}""")
print(f" Délka: {len(sol_bytes)} bytes")
print(f" Raw: {sol_bytes}")
# 3) Jak Board parsuje data
print("\n=== Board po rozbalení ===")
board_data = await page.evaluate("""() => {
const board = DKS.puzzle.board;
return {
size: board.size,
cell_count: board._canvas ? 'has canvas' : 'no canvas',
};
}""")
print(json.dumps(board_data, indent=2))
# 4) Buňky a klece z board
print("\n=== Board cells ===")
cells_data = await page.evaluate("""() => {
const board = DKS.puzzle.board;
if (!board._cells) return 'no _cells';
const result = [];
for (let r = 0; r < board.size; r++) {
for (let c = 0; c < board.size; c++) {
const cell = board._cells[r][c];
result.push({
row: r, col: c,
value: cell._value || cell.value,
cage: cell._cage ? {
sum: cell._cage._sum || cell._cage.sum,
id: cell._cage._id || cell._cage.id
} : null
});
}
}
return result;
}""")
if isinstance(cells_data, list):
print(f" Celkem buněk: {len(cells_data)}")
for c in cells_data[:20]:
print(f" [{c['row']},{c['col']}] value={c.get('value')} cage={c.get('cage')}")
else:
print(f" {cells_data}")
# 5) Zkus přístup přes cages
print("\n=== Cages ===")
cages_data = await page.evaluate("""() => {
const board = DKS.puzzle.board;
// Zkus najít cages
const props = Object.keys(board).filter(k => !k.startsWith('_') || k.includes('cage') || k.includes('Cage'));
const allProps = Object.keys(board);
return {all_props: allProps, filtered: props};
}""")
print(json.dumps(cages_data, indent=2))
# 6) Všechny vlastnosti boardu
print("\n=== Board — všechny vlastnosti ===")
board_full = await page.evaluate("""() => {
const board = DKS.puzzle.board;
const result = {};
for (const key of Object.keys(board)) {
const val = board[key];
const t = typeof val;
if (t === 'function') continue;
if (t === 'object' && val !== null) {
if (Array.isArray(val)) {
result[key] = `Array(${val.length})`;
if (val.length > 0 && val.length < 100) {
try {
const sample = val[0];
result[key + '_sample'] = typeof sample === 'object' ? Object.keys(sample || {}).slice(0,10) : sample;
} catch(e) {}
}
} else {
result[key] = Object.keys(val).slice(0, 10);
}
} else {
result[key] = val;
}
}
return result;
}""")
print(json.dumps(board_full, indent=2, ensure_ascii=False)[:5000])
# 7) Solution data
print("\n=== Solution ===")
solution_data = await page.evaluate("""() => {
const sol = DKS.puzzle.solution;
if (!sol) return 'no solution';
const props = Object.keys(sol);
const result = {props: props};
for (const p of props) {
const v = sol[p];
if (typeof v !== 'function') {
if (Array.isArray(v)) {
result[p] = v.slice(0, 20);
} else {
result[p] = v;
}
}
}
return result;
}""")
print(json.dumps(solution_data, indent=2, ensure_ascii=False)[:3000])
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,77 @@
"""
Průzkumný skript v4: vytáhne klece (cages) z DKS.puzzle.board.
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/376"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# Klece
print("\n=== Cages ===")
cages = await page.evaluate("""() => {
const board = DKS.puzzle.board;
return board._cages.map((cage, i) => ({
id: i,
sum: cage.sum,
cells: cage.cells.map(c => ({row: c._row, col: c._col}))
}));
}""")
for cage in cages:
cells_str = ", ".join(f"({c['row']},{c['col']})" for c in cage['cells'])
print(f" Klec {cage['id']:2d}: sum={cage['sum']:2d}, buňky=[{cells_str}]")
# Řešení
print("\n=== Řešení ===")
solution = await page.evaluate("""() => {
return DKS.puzzle.solution._values;
}""")
for r, row in enumerate(solution):
print(f" Řádek {r}: {row}")
# Cage map — ověření
print("\n=== Cage map (ověření) ===")
cage_map = await page.evaluate("""() => {
const board = DKS.puzzle.board;
const map = [];
for (let r = 0; r < board.size; r++) {
const row = [];
for (let c = 0; c < board.size; c++) {
const cell = board._cells[r][c];
const cageIdx = board._cages.indexOf(cell._cage);
row.push(cageIdx);
}
map.push(row);
}
return map;
}""")
for r, row in enumerate(cage_map):
print(f" {row}")
# Ověření součtů
print("\n=== Ověření součtů ===")
for cage in cages:
total = sum(solution[c['row']][c['col']] for c in cage['cells'])
ok = "" if total == cage['sum'] else ""
print(f" Klec {cage['id']:2d}: sum={cage['sum']:2d}, actual={total:2d} {ok}")
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,113 @@
"""
Průzkumný skript v5: najde správné property names pro cell row/col.
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/376"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# Zjisti property names buněk v klecích
print("\n=== Cell properties ===")
cell_props = await page.evaluate("""() => {
const cage = DKS.puzzle.board._cages[0];
const cell = cage.cells[0];
return Object.keys(cell);
}""")
print(json.dumps(cell_props, indent=2))
# Zkus všechny varianty row/col
print("\n=== Cell row/col lookup ===")
cell_data = await page.evaluate("""() => {
const cage = DKS.puzzle.board._cages[0];
const cell = cage.cells[0];
const result = {};
for (const key of Object.keys(cell)) {
const val = cell[key];
if (typeof val !== 'function' && typeof val !== 'object') {
result[key] = val;
}
}
return result;
}""")
print(json.dumps(cell_data, indent=2))
# Klece s buňkami — správné property
print("\n=== Cages s buňkami ===")
cages = await page.evaluate("""() => {
const board = DKS.puzzle.board;
return board._cages.map((cage, i) => {
const cells = cage.cells.map(c => {
// Najdi row/col property
const keys = Object.keys(c);
const rowKey = keys.find(k => k.toLowerCase().includes('row') && typeof c[k] === 'number');
const colKey = keys.find(k => (k.toLowerCase().includes('col') || k.toLowerCase().includes('column')) && typeof c[k] === 'number');
return {
row: rowKey ? c[rowKey] : null,
col: colKey ? c[colKey] : null,
rowKey: rowKey,
colKey: colKey
};
});
return {id: i, sum: cage.sum, cells: cells};
});
}""")
for cage in cages[:5]:
cells_str = ", ".join(f"({c['row']},{c['col']})" for c in cage['cells'])
print(f" Klec {cage['id']:2d}: sum={cage['sum']:2d}, buňky=[{cells_str}]")
if cage['id'] == 0:
print(f" rowKey={cage['cells'][0]['rowKey']}, colKey={cage['cells'][0]['colKey']}")
# Pokud row/col stále None, zkus index-based approach
print("\n=== Fallback: cage map z _cells ===")
cage_map = await page.evaluate("""() => {
const board = DKS.puzzle.board;
const result = [];
for (const cage of board._cages) {
const cellPositions = [];
for (const cageCell of cage.cells) {
// Najdi pozici buňky v _cells mřížce
for (let r = 0; r < board.size; r++) {
for (let c = 0; c < board.size; c++) {
if (board._cells[r][c] === cageCell) {
cellPositions.push({row: r, col: c});
}
}
}
}
result.push({sum: cage.sum, cells: cellPositions});
}
return result;
}""")
for i, cage in enumerate(cage_map):
cells_str = ", ".join(f"({c['row']},{c['col']})" for c in cage['cells'])
print(f" Klec {i:2d}: sum={cage['sum']:2d}, buňky=[{cells_str}]")
# Ověření součtů
print("\n=== Ověření součtů ===")
solution = await page.evaluate("() => DKS.puzzle.solution._values")
for i, cage in enumerate(cage_map):
total = sum(solution[c['row']][c['col']] for c in cage['cells'])
ok = "" if total == cage['sum'] else ""
print(f" Klec {i:2d}: sum={cage['sum']:2d}, actual={total:2d} {ok}")
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
@@ -0,0 +1,35 @@
"""
Zjistí rozsah puzzle v sudoku_killer tabulce a počet.
"""
import sys
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from mysql_db import connect_mysql
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
cur.execute("""
SELECT puzzle_type_id, COUNT(*), MIN(puzzle_number), MAX(puzzle_number),
MIN(puzzle_date), MAX(puzzle_date)
FROM sudoku_killer
GROUP BY puzzle_type_id
""")
for row in cur.fetchall():
print(f" type_id={row[0]}, count={row[1]}, nums={row[2]}-{row[3]}, dates={row[4]}-{row[5]}")
cur.execute("SELECT id, name FROM puzzle_type")
for row in cur.fetchall():
print(f" puzzle_type: id={row[0]}, name={row[1]}")
cur.execute("""
SELECT COUNT(*) FROM puzzles WHERE game_type = 'killer_sudoku'
""")
print(f" Už v puzzles tabulce: {cur.fetchone()[0]}")
cur.close()
conn.close()
@@ -0,0 +1,254 @@
"""
Stáhne strukturovaná data (cage definice + řešení) z dailykillersudoku.com
a uloží do sdílené tabulky puzzles.
Funguje bez Playwright — data jsou inline v HTML jako JSON, dekóduje se base64 v Pythonu.
"""
import base64
import json
import re
import sys
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
import requests
from tqdm import tqdm
sys.stdout.reconfigure(encoding="utf-8")
sys.stderr.reconfigure(encoding="utf-8")
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from mysql_db import connect_mysql
PUZZLE_TYPE_MAP = {1: "killer_sudoku", 2: "killer_sudoku_gt"}
BASE_URL = "https://www.dailykillersudoku.com/puzzle/{}"
def fetch_puzzle_json(puzzle_number: int) -> dict | None:
url = BASE_URL.format(puzzle_number)
try:
r = requests.get(url, timeout=15)
if r.status_code != 200:
return None
m = re.search(r'new DKS\.Puzzle\((\{.*?\})\)', r.text)
if not m:
return None
return json.loads(m.group(1))
except Exception:
return None
def decode_board(board_b64: str) -> tuple[list[list[int]], list[int]]:
"""Dekóduje board_base64 → (cage_map 9x9, cage_sums)."""
raw = base64.b64decode(board_b64)
# Header: 2 bytes, pak 81 × 2 bytes (uint16 BE cage IDs), pak N bytes (sums)
cell_data = raw[2:2 + 81 * 2]
sum_data = raw[2 + 81 * 2:]
cage_map = []
for r in range(9):
row = []
for c in range(9):
idx = (r * 9 + c) * 2
cage_id = (cell_data[idx] << 8) | cell_data[idx + 1]
row.append(cage_id)
cage_map.append(row)
cage_sums = list(sum_data)
return cage_map, cage_sums
def decode_solution(solution_b64: str) -> list[list[int]]:
"""Dekóduje solution_base64 → 9x9 mřížka."""
raw = base64.b64decode(solution_b64)
values = list(raw[2:]) # skip 2-byte header
return [values[r * 9:(r + 1) * 9] for r in range(9)]
def build_cages_string(cage_map: list[list[int]], cage_sums: list[int]) -> str:
"""Vytvoří cage string ve formátu: sum,r0c0r0c1|sum,r1c2r1c3|..."""
cages = {}
for r in range(9):
for c in range(9):
cid = cage_map[r][c]
if cid not in cages:
cages[cid] = []
cages[cid].append(f"r{r}c{c}")
parts = []
for cid in sorted(cages.keys()):
s = cage_sums[cid] if cid < len(cage_sums) else 0
cells = "".join(cages[cid])
parts.append(f"{s},{cells}")
return "|".join(parts)
def build_solution_string(solution: list[list[int]]) -> str:
return "".join(str(v) for row in solution for v in row)
def process_puzzle(puzzle_number: int) -> dict | None:
pj = fetch_puzzle_json(puzzle_number)
if not pj:
return None
try:
cage_map, cage_sums = decode_board(pj["board_base64"])
solution = decode_solution(pj["solution_base64"])
cage_str = build_cages_string(cage_map, cage_sums)
sol_str = build_solution_string(solution)
game_type = PUZZLE_TYPE_MAP.get(pj.get("puzzle_type", 1), "killer_sudoku")
return {
"puzzle_number": pj["id"],
"game_type": game_type,
"difficulty": str(pj.get("difficulty", 0)),
"puzzle_date": pj.get("date"),
"puzzle": cage_str,
"solution": sol_str,
"extra": json.dumps({
"grid_size": 9,
"puzzle_number": pj["id"],
"original_difficulty": pj.get("difficulty"),
}),
"source": "dailykillersudoku.com",
}
except Exception as e:
return None
def save_batch(results: list[dict]):
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
inserted = 0
for r in results:
cur.execute(
"INSERT INTO puzzles "
"(game_type, difficulty, puzzle_date, puzzle, solution, extra, source) "
"VALUES (%s, %s, %s, %s, %s, %s, %s) "
"ON DUPLICATE KEY UPDATE puzzle=VALUES(puzzle), solution=VALUES(solution), "
"extra=VALUES(extra)",
(r["game_type"], r["difficulty"], r["puzzle_date"],
r["puzzle"], r["solution"], r["extra"], r["source"]),
)
if cur.rowcount > 0:
inserted += 1
cur.close()
conn.close()
return inserted
def get_puzzle_numbers() -> list[int]:
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
cur.execute("SELECT puzzle_number FROM sudoku_killer ORDER BY puzzle_number")
nums = [row[0] for row in cur.fetchall()]
cur.close()
conn.close()
return nums
JSON_FILE = Path(__file__).parent / "killer_structured_data.json"
def download_all(puzzle_numbers: list[int]) -> list[dict]:
"""Stáhne všechna puzzle z webu, průběžně ukládá do JSON souboru."""
all_results = []
if JSON_FILE.exists():
all_results = json.loads(JSON_FILE.read_text(encoding="utf-8"))
print(f"Načteno {len(all_results)} existujících záznamů z JSON")
done_numbers = {r["puzzle_number"] for r in all_results}
remaining = [n for n in puzzle_numbers if n not in done_numbers]
print(f"Zbývá stáhnout: {len(remaining)} z {len(puzzle_numbers)}")
if not remaining:
return all_results
batch_size = 100
errors = 0
with ThreadPoolExecutor(max_workers=6) as executor:
for start in tqdm(range(0, len(remaining), batch_size),
desc="Stahování", unit="batch"):
batch_nums = remaining[start:start + batch_size]
futures = {executor.submit(process_puzzle, n): n for n in batch_nums}
for future in as_completed(futures):
result = future.result()
if result:
all_results.append(result)
else:
errors += 1
JSON_FILE.write_text(
json.dumps(all_results, ensure_ascii=False), encoding="utf-8"
)
print(f"Staženo celkem: {len(all_results)}, chyb: {errors}")
return all_results
def import_from_json():
"""Importuje data z JSON souboru do MySQL."""
if not JSON_FILE.exists():
print("JSON soubor neexistuje, nejdřív spusť stahování.")
return
all_results = json.loads(JSON_FILE.read_text(encoding="utf-8"))
print(f"Importuji {len(all_results)} záznamů z JSON do MySQL...")
batch_size = 500
total_inserted = 0
for start in tqdm(range(0, len(all_results), batch_size),
desc="Import", unit="batch"):
batch = all_results[start:start + batch_size]
inserted = save_batch(batch)
total_inserted += inserted
print(f"Import hotov: aktualizováno {total_inserted} záznamů")
def main():
# Test na jednom puzzle
print("=== Test: puzzle 376 ===")
result = process_puzzle(376)
if result:
print(f" game_type: {result['game_type']}")
print(f" difficulty: {result['difficulty']}")
print(f" date: {result['puzzle_date']}")
print(f" cages ({len(result['puzzle'].split('|'))} klecí): {result['puzzle'][:100]}...")
print(f" solution: {result['solution']}")
else:
print(" Selhalo!")
return
if "--import" in sys.argv:
import_from_json()
return
if "--run" not in sys.argv:
print("\nPro stažení spusť s --run, pro import z JSON s --import")
return
puzzle_numbers = get_puzzle_numbers()
print(f"\nCelkem puzzle k zpracování: {len(puzzle_numbers)}")
all_results = download_all(puzzle_numbers)
print("\nImportuji do MySQL...")
batch_size = 500
total_inserted = 0
for start in tqdm(range(0, len(all_results), batch_size),
desc="Import", unit="batch"):
batch = all_results[start:start + batch_size]
inserted = save_batch(batch)
total_inserted += inserted
print(f"\nHotovo: aktualizováno {total_inserted} záznamů")
if __name__ == "__main__":
main()
@@ -0,0 +1,229 @@
"""
Vykreslí Killer Sudoku puzzle do PDF z dat v MySQL tabulce puzzles.
"""
import json
import os
import re
import sys
from pathlib import Path
sys.stdout.reconfigure(encoding="utf-8")
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Knihovny"))
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import cm
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen.canvas import Canvas
from mysql_db import connect_mysql
_fonts_dir = os.path.join(os.environ.get("WINDIR", r"C:\Windows"), "Fonts")
pdfmetrics.registerFont(TTFont("Arial", os.path.join(_fonts_dir, "arial.ttf")))
pdfmetrics.registerFont(TTFont("ArialBold", os.path.join(_fonts_dir, "arialbd.ttf")))
OUTPUT = Path(__file__).parent / "test_killer_sudoku_v6.pdf"
def parse_cages(puzzle_str: str) -> list[dict]:
cages = []
for part in puzzle_str.split("|"):
target, cells_str = part.split(",", 1)
cells = [(int(m[1]), int(m[2])) for m in re.finditer(r"r(\d)c(\d)", cells_str)]
cages.append({"sum": int(target), "cells": cells})
return cages
def build_cage_map(cages: list[dict]) -> list[list[int]]:
cage_map = [[-1] * 9 for _ in range(9)]
for i, cage in enumerate(cages):
for row, col in cage["cells"]:
cage_map[row][col] = i
return cage_map
def cage_label_cell(cage: dict) -> tuple[int, int]:
return min(cage["cells"], key=lambda c: (c[0], c[1]))
def parse_solution(solution_str: str) -> list[list[int]]:
return [[int(solution_str[r * 9 + c]) for c in range(9)] for r in range(9)]
def draw_killer_sudoku(c: Canvas, x0: float, y0: float, cell: float,
cages: list[dict], cage_map: list[list[int]],
title: str = "", solution: list[list[int]] | None = None):
label_font = max(cell * 0.22, 5)
num_font = max(cell * 0.45, 7)
thin = 0.3
cage_line = 1.0
thick = 2.2
if title:
c.setFont("ArialBold", 12)
c.drawString(x0, y0 + 5, title)
# Bílé pozadí
c.setFillColor(colors.white)
c.rect(x0, y0 - 9 * cell, 9 * cell, 9 * cell, fill=1, stroke=0)
# Řešení
if solution:
c.setFillColor(colors.Color(0.25, 0.25, 0.25))
c.setFont("Arial", num_font)
for r in range(9):
for co in range(9):
cx = x0 + co * cell + cell / 2
cy = y0 - (r + 1) * cell + cell * 0.28
c.drawCentredString(cx, cy, str(solution[r][co]))
# --- Vrstva 1: kompletní sudoku mřížka (tenké plné čáry) ---
c.setStrokeColor(colors.Color(0.55, 0.55, 0.55))
c.setLineWidth(thin)
for i in range(1, 9):
c.line(x0, y0 - i * cell, x0 + 9 * cell, y0 - i * cell)
c.line(x0 + i * cell, y0, x0 + i * cell, y0 - 9 * cell)
# --- Vrstva 2: tečkované ohraničení klecí (odsazené dovnitř buněk) ---
inset = cell * 0.10
c.setStrokeColor(colors.Color(0.2, 0.2, 0.2))
c.setLineWidth(cage_line * 0.5)
c.setDash(3, 2)
# Horizontální hrany klecí — top borders
for r in range(9):
co = 0
while co < 9:
cid = cage_map[r][co]
if not (r == 0 or cage_map[r - 1][co] != cid):
co += 1
continue
seg_start = co
while co < 9 and cage_map[r][co] == cid and (r == 0 or cage_map[r - 1][co] != cid):
co += 1
c.line(x0 + seg_start * cell + inset, y0 - r * cell - inset,
x0 + co * cell - inset, y0 - r * cell - inset)
# Horizontální hrany klecí — bottom borders
for r in range(9):
co = 0
while co < 9:
cid = cage_map[r][co]
if not (r == 8 or cage_map[r + 1][co] != cid):
co += 1
continue
seg_start = co
while co < 9 and cage_map[r][co] == cid and (r == 8 or cage_map[r + 1][co] != cid):
co += 1
c.line(x0 + seg_start * cell + inset, y0 - (r + 1) * cell + inset,
x0 + co * cell - inset, y0 - (r + 1) * cell + inset)
# Vertikální hrany klecí — left borders
for co in range(9):
r = 0
while r < 9:
cid = cage_map[r][co]
if not (co == 0 or cage_map[r][co - 1] != cid):
r += 1
continue
seg_start = r
while r < 9 and cage_map[r][co] == cid and (co == 0 or cage_map[r][co - 1] != cid):
r += 1
c.line(x0 + co * cell + inset, y0 - seg_start * cell - inset,
x0 + co * cell + inset, y0 - r * cell + inset)
# Vertikální hrany klecí — right borders
for co in range(9):
r = 0
while r < 9:
cid = cage_map[r][co]
if not (co == 8 or cage_map[r][co + 1] != cid):
r += 1
continue
seg_start = r
while r < 9 and cage_map[r][co] == cid and (co == 8 or cage_map[r][co + 1] != cid):
r += 1
c.line(x0 + (co + 1) * cell - inset, y0 - seg_start * cell - inset,
x0 + (co + 1) * cell - inset, y0 - r * cell + inset)
c.setDash()
# Tlusté 3×3 čáry + vnější okraj
c.setStrokeColor(colors.black)
c.setLineWidth(thick)
for i in range(0, 10, 3):
c.line(x0, y0 - i * cell, x0 + 9 * cell, y0 - i * cell)
c.line(x0 + i * cell, y0, x0 + i * cell, y0 - 9 * cell)
# Popisky klecí (součty) — nakonec, aby nebyly překryty čarami
c.setFillColor(colors.white)
c.setFont("ArialBold", label_font)
for cage in cages:
if not cage["cells"]:
continue
row, col = cage_label_cell(cage)
lx = x0 + col * cell + cell * 0.05
ly = y0 - row * cell - label_font * 1.05
txt = str(cage["sum"])
tw = c.stringWidth(txt, "ArialBold", label_font)
c.rect(lx - 0.5, ly - 0.5, tw + 1, label_font + 1, fill=1, stroke=0)
c.setFillColor(colors.black)
c.setFont("ArialBold", label_font)
for cage in cages:
if not cage["cells"]:
continue
row, col = cage_label_cell(cage)
lx = x0 + col * cell + cell * 0.05
ly = y0 - row * cell - label_font * 1.05
c.drawString(lx, ly, str(cage["sum"]))
def main():
conn = connect_mysql(database="puzzle")
cur = conn.cursor()
cur.execute(
"SELECT difficulty, puzzle, solution, extra FROM puzzles "
"WHERE game_type='killer_sudoku' AND extra LIKE '%%\"puzzle_number\": 31414%%' "
"LIMIT 1"
)
row = cur.fetchone()
cur.close()
conn.close()
if not row:
print("Žádná data.")
return
difficulty, puzzle_str, solution_str, extra_json = row
extra = json.loads(extra_json)
cages = parse_cages(puzzle_str)
cage_map = build_cage_map(cages)
solution = parse_solution(solution_str)
page_w, page_h = A4
board_cm = 11
cell = board_cm * cm / 9
board_px = 9 * cell
c = Canvas(str(OUTPUT), pagesize=A4)
# Zadání
x0 = (page_w - board_px) / 2
y0 = page_h - 2 * cm
draw_killer_sudoku(c, x0, y0, cell, cages, cage_map,
f"Killer Sudoku (difficulty {difficulty}) — {extra.get('puzzle_number', '')}")
# Řešení
y0_sol = y0 - board_px - 3 * cm
draw_killer_sudoku(c, x0, y0_sol, cell, cages, cage_map,
"Řešení", solution=solution)
c.save()
print(f"PDF uloženo: {OUTPUT}")
if __name__ == "__main__":
main()