notebookVB
This commit is contained in:
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
Open the first leaf (deepest) subcategory from categories_live.json
|
||||
and list all products in it via the Rohlik JSON API.
|
||||
|
||||
Endpoint:
|
||||
GET /api/v1/categories/normal/{categoryId}/products?page=N&size=50&sort=recommended
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from playwright.sync_api import sync_playwright
|
||||
from config import BASE_URL
|
||||
from test_login import ensure_logged_in
|
||||
|
||||
TREE_PATH = Path(__file__).parent / "categories_live.json"
|
||||
PAGE_SIZE = 50
|
||||
|
||||
|
||||
def find_first_leaf(nodes, path=None):
|
||||
"""Walk the tree depth-first and return (path, leaf_node) of the first leaf."""
|
||||
if path is None:
|
||||
path = []
|
||||
for n in nodes:
|
||||
current = path + [n["name"]]
|
||||
children = n.get("children") or []
|
||||
if not children:
|
||||
return current, n
|
||||
result = find_first_leaf(children, current)
|
||||
if result:
|
||||
return result
|
||||
return None
|
||||
|
||||
|
||||
def fetch_products_page(context, category_id, page):
|
||||
url = f"{BASE_URL}/api/v1/categories/normal/{category_id}/products"
|
||||
params = {"page": page, "size": PAGE_SIZE, "sort": "recommended", "filter": "", "excludeProductIds": ""}
|
||||
resp = context.request.get(url, params=params)
|
||||
if resp.status != 200:
|
||||
raise RuntimeError(f"GET {url} -> {resp.status}: {resp.text()[:200]}")
|
||||
return resp.json()
|
||||
|
||||
|
||||
def extract_products(payload):
|
||||
"""Find the products list in the payload — try common shapes."""
|
||||
if isinstance(payload, list):
|
||||
return payload
|
||||
if isinstance(payload, dict):
|
||||
for k in ("products", "data", "items"):
|
||||
v = payload.get(k)
|
||||
if isinstance(v, list):
|
||||
return v
|
||||
if isinstance(v, dict):
|
||||
for k2 in ("products", "items"):
|
||||
if isinstance(v.get(k2), list):
|
||||
return v[k2]
|
||||
return []
|
||||
|
||||
|
||||
def format_price(p):
|
||||
"""Try common price fields."""
|
||||
if not isinstance(p, dict):
|
||||
return ""
|
||||
for k in ("price", "amount", "value"):
|
||||
v = p.get(k)
|
||||
if isinstance(v, (int, float)):
|
||||
return f"{v:.2f}"
|
||||
if isinstance(v, dict):
|
||||
for k2 in ("amount", "value", "full"):
|
||||
if isinstance(v.get(k2), (int, float)):
|
||||
return f"{v[k2]:.2f}"
|
||||
return ""
|
||||
|
||||
|
||||
def main():
|
||||
if not TREE_PATH.exists():
|
||||
raise SystemExit(f"Missing {TREE_PATH} — run scrape_categories.py first.")
|
||||
|
||||
data = json.loads(TREE_PATH.read_text(encoding="utf-8"))
|
||||
tree = data["tree"]
|
||||
path, leaf = find_first_leaf(tree)
|
||||
print(f"First leaf: {' > '.join(path)} (id={leaf['id']})")
|
||||
print(f"URL: {BASE_URL}{leaf['url']}\n")
|
||||
|
||||
with sync_playwright() as pw:
|
||||
context, page = ensure_logged_in(pw)
|
||||
|
||||
all_products = []
|
||||
page_num = 0
|
||||
while True:
|
||||
print(f"Fetching page {page_num} ...")
|
||||
payload = fetch_products_page(context, leaf["id"], page_num)
|
||||
products = extract_products(payload)
|
||||
print(f" got {len(products)} products")
|
||||
if not products:
|
||||
break
|
||||
all_products.extend(products)
|
||||
if len(products) < PAGE_SIZE:
|
||||
break
|
||||
page_num += 1
|
||||
|
||||
print(f"\nTotal products: {len(all_products)}\n")
|
||||
|
||||
# Show first product raw structure so we can confirm field names
|
||||
if all_products:
|
||||
print("--- Sample raw product (first item, truncated) ---")
|
||||
print(json.dumps(all_products[0], ensure_ascii=False, indent=2)[:1500])
|
||||
print("--- end sample ---\n")
|
||||
|
||||
print("Products in category:")
|
||||
for p in all_products:
|
||||
name = p.get("productName") or p.get("name") or p.get("title") or "?"
|
||||
pid = p.get("productId") or p.get("id") or "?"
|
||||
price = format_price(p)
|
||||
print(f" [{pid}] {name} {price}")
|
||||
|
||||
out_path = Path(__file__).parent / f"products_{leaf['id']}.json"
|
||||
out_path.write_text(json.dumps(all_products, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
print(f"\nSaved raw products -> {out_path} ({out_path.stat().st_size} bytes)")
|
||||
|
||||
context.browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user