notebookvb

This commit is contained in:
Vladimir Buzalka
2026-05-08 22:06:57 +02:00
parent c9903646f1
commit c4c0d1d435
14 changed files with 1666 additions and 0 deletions
@@ -0,0 +1,184 @@
"""
Průzkumný skript: zkouší najít strukturovaná data puzzle
na dailykillersudoku.com (cage definice, řešení).
"""
import asyncio
import json
import sys
sys.stdout.reconfigure(encoding="utf-8")
from playwright.async_api import async_playwright
URL = "https://www.dailykillersudoku.com/puzzle/70000"
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={"width": 1280, "height": 900})
page = await context.new_page()
# Zachytávat network requesty
api_responses = []
async def on_response(response):
url = response.url
if any(k in url for k in ["api", "puzzle", "data", "json", "cage", "grid"]):
try:
body = await response.text()
api_responses.append({"url": url, "status": response.status, "body": body[:2000]})
except:
api_responses.append({"url": url, "status": response.status, "body": "(could not read)"})
page.on("response", on_response)
print(f"Načítám {URL} ...")
await page.goto(URL, wait_until="networkidle", timeout=60_000)
# 1) Network requesty
print("\n=== Zachycené API/data requesty ===")
for r in api_responses:
print(f"\n URL: {r['url']}")
print(f" Status: {r['status']}")
if r['body'] and len(r['body']) < 2000:
print(f" Body: {r['body'][:500]}")
# 2) Globální JS proměnné
print("\n=== Globální proměnné ===")
globals_check = await page.evaluate("""() => {
const names = ['puzzle', 'puzzleData', 'gameData', 'game', 'board',
'grid', 'cages', 'cells', 'solution', 'killerData',
'sudoku', 'level', 'data', 'config', 'state',
'app', 'store', 'vuex', '__NUXT__', '__NEXT_DATA__',
'initialData', 'pageData', 'props', 'serverData'];
const found = {};
for (const name of names) {
if (typeof window[name] !== 'undefined') {
const val = window[name];
found[name] = {
type: typeof val,
keys: typeof val === 'object' && val !== null ? Object.keys(val).slice(0, 20) : null
};
}
}
return found;
}""")
print(json.dumps(globals_check, indent=2))
# 3) SVG/Canvas analýza
print("\n=== SVG/Canvas elementy ===")
svg_info = await page.evaluate("""() => {
const svgs = document.querySelectorAll('svg');
const canvases = document.querySelectorAll('canvas');
return {
svg_count: svgs.length,
canvas_count: canvases.length,
svg_ids: Array.from(svgs).map(s => s.id || s.className || '(no id)').slice(0, 5),
canvas_ids: Array.from(canvases).map(c => c.id || c.className || '(no id)').slice(0, 5)
};
}""")
print(json.dumps(svg_info, indent=2))
# 4) Data atributy
print("\n=== Elementy s data- atributy ===")
data_attrs = await page.evaluate("""() => {
const all = document.querySelectorAll('[data-cage], [data-cell], [data-sum], [data-group], [data-value], [data-row], [data-col]');
return {
count: all.length,
samples: Array.from(all).slice(0, 5).map(el => ({
tag: el.tagName,
attrs: Object.fromEntries(Array.from(el.attributes).filter(a => a.name.startsWith('data-')).map(a => [a.name, a.value]))
}))
};
}""")
print(json.dumps(data_attrs, indent=2))
# 5) Tabulky a mřížky
print("\n=== Tabulky / grid struktury ===")
tables = await page.evaluate("""() => {
const tables = document.querySelectorAll('table');
const grids = document.querySelectorAll('[class*=grid], [class*=puzzle], [class*=board], [class*=cage], [class*=cell], [id*=grid], [id*=puzzle], [id*=board]');
return {
table_count: tables.length,
grid_elements: Array.from(grids).slice(0, 10).map(el => ({
tag: el.tagName,
id: el.id,
class: el.className.toString().substring(0, 100),
children: el.children.length
}))
};
}""")
print(json.dumps(tables, indent=2))
# 6) Script tagy s daty
print("\n=== Script tagy s daty ===")
scripts = await page.evaluate("""() => {
const scripts = document.querySelectorAll('script');
const results = [];
for (const s of scripts) {
const text = s.textContent || '';
if (text.length > 10 && text.length < 50000) {
const keywords = ['puzzle', 'cage', 'cell', 'grid', 'solution', 'board', 'sum'];
const found = keywords.filter(k => text.toLowerCase().includes(k));
if (found.length > 0) {
results.push({
keywords: found,
length: text.length,
snippet: text.substring(0, 500)
});
}
}
}
return results;
}""")
print(json.dumps(scripts, indent=2, ensure_ascii=False)[:5000])
# 7) Vue/React/Angular state
print("\n=== Framework state ===")
framework = await page.evaluate("""() => {
// Vue
const vueEl = document.querySelector('[data-v-app]') || document.querySelector('#app') || document.querySelector('#__nuxt');
let vueData = null;
if (vueEl && vueEl.__vue_app__) {
vueData = 'Vue 3 app found';
} else if (vueEl && vueEl.__vue__) {
vueData = 'Vue 2 app found';
try {
const d = vueEl.__vue__.$data;
vueData = {type: 'Vue 2', keys: Object.keys(d)};
} catch(e) {}
}
// __NUXT__
if (typeof __NUXT__ !== 'undefined') {
try { vueData = {type: 'Nuxt', keys: Object.keys(__NUXT__)}; } catch(e) {}
}
// React
let reactData = null;
const reactRoot = document.querySelector('#__next') || document.querySelector('#root');
if (reactRoot) {
const fiberKey = Object.keys(reactRoot).find(k => k.startsWith('__reactFiber') || k.startsWith('__reactInternalInstance'));
if (fiberKey) reactData = 'React app found';
}
return {vue: vueData, react: reactData};
}""")
print(json.dumps(framework, indent=2))
# 8) Všechny window properties (custom)
print("\n=== Custom window properties ===")
custom_props = await page.evaluate("""() => {
const iframe = document.createElement('iframe');
document.body.appendChild(iframe);
const defaultKeys = new Set(Object.keys(iframe.contentWindow));
document.body.removeChild(iframe);
const custom = Object.keys(window).filter(k => !defaultKeys.has(k) && !k.startsWith('__'));
return custom.slice(0, 50);
}""")
print(json.dumps(custom_props, indent=2))
await browser.close()
if __name__ == "__main__":
asyncio.run(main())