Files
ordinaceprojekt/SběrDatRůzné/SudokuKiller/preskumaj_killer_data2.py
T
Vladimir Buzalka c4c0d1d435 notebookvb
2026-05-08 22:06:57 +02:00

175 lines
6.7 KiB
Python

"""
Průzkumný skript v2: zkouší najít strukturovaná data puzzle
na dailykillersudoku.com — prozkoumá DKS objekt a platný puzzle.
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/376"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
api_responses = []
async def on_response(response):
url = response.url
if "dailykillersudoku" in url and url != URL:
try:
ct = response.headers.get("content-type", "")
if "json" in ct or "javascript" in ct or "text" in ct:
body = await response.text()
if len(body) < 5000:
api_responses.append({"url": url, "status": response.status, "body": body[:2000]})
else:
api_responses.append({"url": url, "status": response.status, "body": f"({len(body)} chars)"})
except:
pass
page.on("response", on_response)
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# 1) DKS objekt — klíče
print("\n=== DKS objekt — klíče ===")
dks = await page.evaluate("""() => {
if (typeof DKS === 'undefined') return null;
const result = {};
for (const key of Object.keys(DKS)) {
const val = DKS[key];
const t = typeof val;
if (t === 'function') {
result[key] = 'function';
} else if (t === 'object' && val !== null) {
result[key] = {type: 'object', keys: Object.keys(val).slice(0, 15)};
} else {
result[key] = val;
}
}
return result;
}""")
if dks:
print(json.dumps(dks, indent=2, ensure_ascii=False)[:5000])
# 2) DKS.board nebo podobné puzzle objekty
print("\n=== DKS puzzle-related data ===")
puzzle_data = await page.evaluate("""() => {
if (typeof DKS === 'undefined') return null;
const result = {};
const interesting = ['board', 'puzzle', 'game', 'grid', 'cages', 'cells',
'solution', 'currentPuzzle', 'puzzleData', 'data',
'sudoku', 'killer', 'state'];
for (const key of Object.keys(DKS)) {
if (interesting.some(i => key.toLowerCase().includes(i))) {
try {
result[key] = JSON.parse(JSON.stringify(DKS[key]));
} catch(e) {
result[key] = String(DKS[key]).substring(0, 200);
}
}
}
return result;
}""")
if puzzle_data:
print(json.dumps(puzzle_data, indent=2, ensure_ascii=False)[:8000])
else:
print(" žádné puzzle data")
# 3) Script tagy s puzzle daty
print("\n=== Script tagy s puzzle daty ===")
scripts = await page.evaluate("""() => {
const scripts = document.querySelectorAll('script');
const results = [];
for (const s of scripts) {
const text = s.textContent || '';
if (text.includes('cage') || text.includes('cell') || text.includes('solution')
|| text.includes('group') || text.includes('sum') || text.includes('Board')
|| text.includes('Puzzle')) {
results.push({
length: text.length,
snippet: text.substring(0, 1000)
});
}
}
return results;
}""")
print(json.dumps(scripts, indent=2, ensure_ascii=False)[:8000])
# 4) Zachycené requesty
print("\n=== Zachycené requesty (dailykillersudoku) ===")
for r in api_responses:
print(f"\n URL: {r['url']}")
print(f" Status: {r['status']}")
print(f" Body: {r['body'][:500]}")
# 5) SVG obsah — puzzle mřížka
print("\n=== SVG puzzle mřížka ===")
svg_data = await page.evaluate("""() => {
const svgs = document.querySelectorAll('svg');
const results = [];
for (const svg of svgs) {
const html = svg.outerHTML;
if (html.length > 1000) {
// Pravděpodobně puzzle mřížka
const texts = svg.querySelectorAll('text');
const textContent = Array.from(texts).map(t => ({
text: t.textContent,
x: t.getAttribute('x'),
y: t.getAttribute('y'),
class: t.getAttribute('class')
}));
const paths = svg.querySelectorAll('path');
results.push({
size: html.length,
width: svg.getAttribute('width'),
height: svg.getAttribute('height'),
viewBox: svg.getAttribute('viewBox'),
text_count: texts.length,
path_count: paths.length,
texts: textContent.slice(0, 30)
});
}
}
return results;
}""")
print(json.dumps(svg_data, indent=2, ensure_ascii=False)[:5000])
# 6) Hledej inline JS s daty puzzle
print("\n=== Inline JS s puzzle daty ===")
inline_data = await page.evaluate("""() => {
const html = document.documentElement.innerHTML;
// Hledej vzory jako JSON pole, cage definice apod.
const patterns = [
/DKS\.\w+\s*=\s*(\{[^}]{20,}\})/g,
/DKS\.\w+\s*=\s*(\[[^\]]{20,}\])/g,
/var\s+\w+\s*=\s*(\{[^}]{50,}\})/g,
/puzzl\w*\s*[:=]\s*["'{[]/gi
];
const found = [];
for (const p of patterns) {
let m;
while ((m = p.exec(html)) !== null) {
found.push(m[0].substring(0, 300));
}
}
return found;
}""")
print(json.dumps(inline_data, indent=2, ensure_ascii=False)[:3000])
await browser.close()
if __name__ == "__main__":
asyncio.run(main())