notebookvb
This commit is contained in:
@@ -0,0 +1,174 @@
|
||||
"""
|
||||
Průzkumný skript v2: zkouší najít strukturovaná data puzzle
|
||||
na dailykillersudoku.com — prozkoumá DKS objekt a platný puzzle.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
|
||||
sys.stdout.reconfigure(encoding="utf-8")
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
URL = "https://www.dailykillersudoku.com/puzzle/376"
|
||||
|
||||
|
||||
async def main():
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
context = await browser.new_context(viewport={"width": 1280, "height": 900})
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
api_responses = []
|
||||
async def on_response(response):
|
||||
url = response.url
|
||||
if "dailykillersudoku" in url and url != URL:
|
||||
try:
|
||||
ct = response.headers.get("content-type", "")
|
||||
if "json" in ct or "javascript" in ct or "text" in ct:
|
||||
body = await response.text()
|
||||
if len(body) < 5000:
|
||||
api_responses.append({"url": url, "status": response.status, "body": body[:2000]})
|
||||
else:
|
||||
api_responses.append({"url": url, "status": response.status, "body": f"({len(body)} chars)"})
|
||||
except:
|
||||
pass
|
||||
|
||||
page.on("response", on_response)
|
||||
|
||||
print(f"Načítám {URL} ...")
|
||||
await page.goto(URL, wait_until="networkidle", timeout=60_000)
|
||||
|
||||
# 1) DKS objekt — klíče
|
||||
print("\n=== DKS objekt — klíče ===")
|
||||
dks = await page.evaluate("""() => {
|
||||
if (typeof DKS === 'undefined') return null;
|
||||
const result = {};
|
||||
for (const key of Object.keys(DKS)) {
|
||||
const val = DKS[key];
|
||||
const t = typeof val;
|
||||
if (t === 'function') {
|
||||
result[key] = 'function';
|
||||
} else if (t === 'object' && val !== null) {
|
||||
result[key] = {type: 'object', keys: Object.keys(val).slice(0, 15)};
|
||||
} else {
|
||||
result[key] = val;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}""")
|
||||
if dks:
|
||||
print(json.dumps(dks, indent=2, ensure_ascii=False)[:5000])
|
||||
|
||||
# 2) DKS.board nebo podobné puzzle objekty
|
||||
print("\n=== DKS puzzle-related data ===")
|
||||
puzzle_data = await page.evaluate("""() => {
|
||||
if (typeof DKS === 'undefined') return null;
|
||||
const result = {};
|
||||
const interesting = ['board', 'puzzle', 'game', 'grid', 'cages', 'cells',
|
||||
'solution', 'currentPuzzle', 'puzzleData', 'data',
|
||||
'sudoku', 'killer', 'state'];
|
||||
for (const key of Object.keys(DKS)) {
|
||||
if (interesting.some(i => key.toLowerCase().includes(i))) {
|
||||
try {
|
||||
result[key] = JSON.parse(JSON.stringify(DKS[key]));
|
||||
} catch(e) {
|
||||
result[key] = String(DKS[key]).substring(0, 200);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}""")
|
||||
if puzzle_data:
|
||||
print(json.dumps(puzzle_data, indent=2, ensure_ascii=False)[:8000])
|
||||
else:
|
||||
print(" žádné puzzle data")
|
||||
|
||||
# 3) Script tagy s puzzle daty
|
||||
print("\n=== Script tagy s puzzle daty ===")
|
||||
scripts = await page.evaluate("""() => {
|
||||
const scripts = document.querySelectorAll('script');
|
||||
const results = [];
|
||||
for (const s of scripts) {
|
||||
const text = s.textContent || '';
|
||||
if (text.includes('cage') || text.includes('cell') || text.includes('solution')
|
||||
|| text.includes('group') || text.includes('sum') || text.includes('Board')
|
||||
|| text.includes('Puzzle')) {
|
||||
results.push({
|
||||
length: text.length,
|
||||
snippet: text.substring(0, 1000)
|
||||
});
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}""")
|
||||
print(json.dumps(scripts, indent=2, ensure_ascii=False)[:8000])
|
||||
|
||||
# 4) Zachycené requesty
|
||||
print("\n=== Zachycené requesty (dailykillersudoku) ===")
|
||||
for r in api_responses:
|
||||
print(f"\n URL: {r['url']}")
|
||||
print(f" Status: {r['status']}")
|
||||
print(f" Body: {r['body'][:500]}")
|
||||
|
||||
# 5) SVG obsah — puzzle mřížka
|
||||
print("\n=== SVG puzzle mřížka ===")
|
||||
svg_data = await page.evaluate("""() => {
|
||||
const svgs = document.querySelectorAll('svg');
|
||||
const results = [];
|
||||
for (const svg of svgs) {
|
||||
const html = svg.outerHTML;
|
||||
if (html.length > 1000) {
|
||||
// Pravděpodobně puzzle mřížka
|
||||
const texts = svg.querySelectorAll('text');
|
||||
const textContent = Array.from(texts).map(t => ({
|
||||
text: t.textContent,
|
||||
x: t.getAttribute('x'),
|
||||
y: t.getAttribute('y'),
|
||||
class: t.getAttribute('class')
|
||||
}));
|
||||
const paths = svg.querySelectorAll('path');
|
||||
results.push({
|
||||
size: html.length,
|
||||
width: svg.getAttribute('width'),
|
||||
height: svg.getAttribute('height'),
|
||||
viewBox: svg.getAttribute('viewBox'),
|
||||
text_count: texts.length,
|
||||
path_count: paths.length,
|
||||
texts: textContent.slice(0, 30)
|
||||
});
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}""")
|
||||
print(json.dumps(svg_data, indent=2, ensure_ascii=False)[:5000])
|
||||
|
||||
# 6) Hledej inline JS s daty puzzle
|
||||
print("\n=== Inline JS s puzzle daty ===")
|
||||
inline_data = await page.evaluate("""() => {
|
||||
const html = document.documentElement.innerHTML;
|
||||
// Hledej vzory jako JSON pole, cage definice apod.
|
||||
const patterns = [
|
||||
/DKS\.\w+\s*=\s*(\{[^}]{20,}\})/g,
|
||||
/DKS\.\w+\s*=\s*(\[[^\]]{20,}\])/g,
|
||||
/var\s+\w+\s*=\s*(\{[^}]{50,}\})/g,
|
||||
/puzzl\w*\s*[:=]\s*["'{[]/gi
|
||||
];
|
||||
const found = [];
|
||||
for (const p of patterns) {
|
||||
let m;
|
||||
while ((m = p.exec(html)) !== null) {
|
||||
found.push(m[0].substring(0, 300));
|
||||
}
|
||||
}
|
||||
return found;
|
||||
}""")
|
||||
print(json.dumps(inline_data, indent=2, ensure_ascii=False)[:3000])
|
||||
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user