185 lines
7.6 KiB
Python
185 lines
7.6 KiB
Python
"""
|
|
Průzkumný skript: zkouší najít strukturovaná data puzzle
|
|
na dailykillersudoku.com (cage definice, řešení).
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
|
|
sys.stdout.reconfigure(encoding="utf-8")
|
|
|
|
from playwright.async_api import async_playwright
|
|
|
|
URL = "https://www.dailykillersudoku.com/puzzle/70000"
|
|
|
|
|
|
async def main():
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(headless=True)
|
|
context = await browser.new_context(viewport={"width": 1280, "height": 900})
|
|
|
|
page = await context.new_page()
|
|
|
|
# Zachytávat network requesty
|
|
api_responses = []
|
|
async def on_response(response):
|
|
url = response.url
|
|
if any(k in url for k in ["api", "puzzle", "data", "json", "cage", "grid"]):
|
|
try:
|
|
body = await response.text()
|
|
api_responses.append({"url": url, "status": response.status, "body": body[:2000]})
|
|
except:
|
|
api_responses.append({"url": url, "status": response.status, "body": "(could not read)"})
|
|
|
|
page.on("response", on_response)
|
|
|
|
print(f"Načítám {URL} ...")
|
|
await page.goto(URL, wait_until="networkidle", timeout=60_000)
|
|
|
|
# 1) Network requesty
|
|
print("\n=== Zachycené API/data requesty ===")
|
|
for r in api_responses:
|
|
print(f"\n URL: {r['url']}")
|
|
print(f" Status: {r['status']}")
|
|
if r['body'] and len(r['body']) < 2000:
|
|
print(f" Body: {r['body'][:500]}")
|
|
|
|
# 2) Globální JS proměnné
|
|
print("\n=== Globální proměnné ===")
|
|
globals_check = await page.evaluate("""() => {
|
|
const names = ['puzzle', 'puzzleData', 'gameData', 'game', 'board',
|
|
'grid', 'cages', 'cells', 'solution', 'killerData',
|
|
'sudoku', 'level', 'data', 'config', 'state',
|
|
'app', 'store', 'vuex', '__NUXT__', '__NEXT_DATA__',
|
|
'initialData', 'pageData', 'props', 'serverData'];
|
|
const found = {};
|
|
for (const name of names) {
|
|
if (typeof window[name] !== 'undefined') {
|
|
const val = window[name];
|
|
found[name] = {
|
|
type: typeof val,
|
|
keys: typeof val === 'object' && val !== null ? Object.keys(val).slice(0, 20) : null
|
|
};
|
|
}
|
|
}
|
|
return found;
|
|
}""")
|
|
print(json.dumps(globals_check, indent=2))
|
|
|
|
# 3) SVG/Canvas analýza
|
|
print("\n=== SVG/Canvas elementy ===")
|
|
svg_info = await page.evaluate("""() => {
|
|
const svgs = document.querySelectorAll('svg');
|
|
const canvases = document.querySelectorAll('canvas');
|
|
return {
|
|
svg_count: svgs.length,
|
|
canvas_count: canvases.length,
|
|
svg_ids: Array.from(svgs).map(s => s.id || s.className || '(no id)').slice(0, 5),
|
|
canvas_ids: Array.from(canvases).map(c => c.id || c.className || '(no id)').slice(0, 5)
|
|
};
|
|
}""")
|
|
print(json.dumps(svg_info, indent=2))
|
|
|
|
# 4) Data atributy
|
|
print("\n=== Elementy s data- atributy ===")
|
|
data_attrs = await page.evaluate("""() => {
|
|
const all = document.querySelectorAll('[data-cage], [data-cell], [data-sum], [data-group], [data-value], [data-row], [data-col]');
|
|
return {
|
|
count: all.length,
|
|
samples: Array.from(all).slice(0, 5).map(el => ({
|
|
tag: el.tagName,
|
|
attrs: Object.fromEntries(Array.from(el.attributes).filter(a => a.name.startsWith('data-')).map(a => [a.name, a.value]))
|
|
}))
|
|
};
|
|
}""")
|
|
print(json.dumps(data_attrs, indent=2))
|
|
|
|
# 5) Tabulky a mřížky
|
|
print("\n=== Tabulky / grid struktury ===")
|
|
tables = await page.evaluate("""() => {
|
|
const tables = document.querySelectorAll('table');
|
|
const grids = document.querySelectorAll('[class*=grid], [class*=puzzle], [class*=board], [class*=cage], [class*=cell], [id*=grid], [id*=puzzle], [id*=board]');
|
|
return {
|
|
table_count: tables.length,
|
|
grid_elements: Array.from(grids).slice(0, 10).map(el => ({
|
|
tag: el.tagName,
|
|
id: el.id,
|
|
class: el.className.toString().substring(0, 100),
|
|
children: el.children.length
|
|
}))
|
|
};
|
|
}""")
|
|
print(json.dumps(tables, indent=2))
|
|
|
|
# 6) Script tagy s daty
|
|
print("\n=== Script tagy s daty ===")
|
|
scripts = await page.evaluate("""() => {
|
|
const scripts = document.querySelectorAll('script');
|
|
const results = [];
|
|
for (const s of scripts) {
|
|
const text = s.textContent || '';
|
|
if (text.length > 10 && text.length < 50000) {
|
|
const keywords = ['puzzle', 'cage', 'cell', 'grid', 'solution', 'board', 'sum'];
|
|
const found = keywords.filter(k => text.toLowerCase().includes(k));
|
|
if (found.length > 0) {
|
|
results.push({
|
|
keywords: found,
|
|
length: text.length,
|
|
snippet: text.substring(0, 500)
|
|
});
|
|
}
|
|
}
|
|
}
|
|
return results;
|
|
}""")
|
|
print(json.dumps(scripts, indent=2, ensure_ascii=False)[:5000])
|
|
|
|
# 7) Vue/React/Angular state
|
|
print("\n=== Framework state ===")
|
|
framework = await page.evaluate("""() => {
|
|
// Vue
|
|
const vueEl = document.querySelector('[data-v-app]') || document.querySelector('#app') || document.querySelector('#__nuxt');
|
|
let vueData = null;
|
|
if (vueEl && vueEl.__vue_app__) {
|
|
vueData = 'Vue 3 app found';
|
|
} else if (vueEl && vueEl.__vue__) {
|
|
vueData = 'Vue 2 app found';
|
|
try {
|
|
const d = vueEl.__vue__.$data;
|
|
vueData = {type: 'Vue 2', keys: Object.keys(d)};
|
|
} catch(e) {}
|
|
}
|
|
// __NUXT__
|
|
if (typeof __NUXT__ !== 'undefined') {
|
|
try { vueData = {type: 'Nuxt', keys: Object.keys(__NUXT__)}; } catch(e) {}
|
|
}
|
|
// React
|
|
let reactData = null;
|
|
const reactRoot = document.querySelector('#__next') || document.querySelector('#root');
|
|
if (reactRoot) {
|
|
const fiberKey = Object.keys(reactRoot).find(k => k.startsWith('__reactFiber') || k.startsWith('__reactInternalInstance'));
|
|
if (fiberKey) reactData = 'React app found';
|
|
}
|
|
return {vue: vueData, react: reactData};
|
|
}""")
|
|
print(json.dumps(framework, indent=2))
|
|
|
|
# 8) Všechny window properties (custom)
|
|
print("\n=== Custom window properties ===")
|
|
custom_props = await page.evaluate("""() => {
|
|
const iframe = document.createElement('iframe');
|
|
document.body.appendChild(iframe);
|
|
const defaultKeys = new Set(Object.keys(iframe.contentWindow));
|
|
document.body.removeChild(iframe);
|
|
const custom = Object.keys(window).filter(k => !defaultKeys.has(k) && !k.startsWith('__'));
|
|
return custom.slice(0, 50);
|
|
}""")
|
|
print(json.dumps(custom_props, indent=2))
|
|
|
|
await browser.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|