""" Open the first leaf (deepest) subcategory from categories_live.json and list all products in it via the Rohlik JSON API. Endpoint: GET /api/v1/categories/normal/{categoryId}/products?page=N&size=50&sort=recommended """ import json from pathlib import Path from playwright.sync_api import sync_playwright from config import BASE_URL from test_login import ensure_logged_in TREE_PATH = Path(__file__).parent / "categories_live.json" PAGE_SIZE = 50 def find_first_leaf(nodes, path=None): """Walk the tree depth-first and return (path, leaf_node) of the first leaf.""" if path is None: path = [] for n in nodes: current = path + [n["name"]] children = n.get("children") or [] if not children: return current, n result = find_first_leaf(children, current) if result: return result return None def fetch_products_page(context, category_id, page): url = f"{BASE_URL}/api/v1/categories/normal/{category_id}/products" params = {"page": page, "size": PAGE_SIZE, "sort": "recommended", "filter": "", "excludeProductIds": ""} resp = context.request.get(url, params=params) if resp.status != 200: raise RuntimeError(f"GET {url} -> {resp.status}: {resp.text()[:200]}") return resp.json() def extract_products(payload): """Find the products list in the payload — try common shapes.""" if isinstance(payload, list): return payload if isinstance(payload, dict): for k in ("products", "data", "items"): v = payload.get(k) if isinstance(v, list): return v if isinstance(v, dict): for k2 in ("products", "items"): if isinstance(v.get(k2), list): return v[k2] return [] def format_price(p): """Try common price fields.""" if not isinstance(p, dict): return "" for k in ("price", "amount", "value"): v = p.get(k) if isinstance(v, (int, float)): return f"{v:.2f}" if isinstance(v, dict): for k2 in ("amount", "value", "full"): if isinstance(v.get(k2), (int, float)): return f"{v[k2]:.2f}" return "" def main(): if not TREE_PATH.exists(): raise SystemExit(f"Missing {TREE_PATH} — run scrape_categories.py first.") data = json.loads(TREE_PATH.read_text(encoding="utf-8")) tree = data["tree"] path, leaf = find_first_leaf(tree) print(f"First leaf: {' > '.join(path)} (id={leaf['id']})") print(f"URL: {BASE_URL}{leaf['url']}\n") with sync_playwright() as pw: context, page = ensure_logged_in(pw) all_products = [] page_num = 0 while True: print(f"Fetching page {page_num} ...") payload = fetch_products_page(context, leaf["id"], page_num) products = extract_products(payload) print(f" got {len(products)} products") if not products: break all_products.extend(products) if len(products) < PAGE_SIZE: break page_num += 1 print(f"\nTotal products: {len(all_products)}\n") # Show first product raw structure so we can confirm field names if all_products: print("--- Sample raw product (first item, truncated) ---") print(json.dumps(all_products[0], ensure_ascii=False, indent=2)[:1500]) print("--- end sample ---\n") print("Products in category:") for p in all_products: name = p.get("productName") or p.get("name") or p.get("title") or "?" pid = p.get("productId") or p.get("id") or "?" price = format_price(p) print(f" [{pid}] {name} {price}") out_path = Path(__file__).parent / f"products_{leaf['id']}.json" out_path.write_text(json.dumps(all_products, ensure_ascii=False, indent=2), encoding="utf-8") print(f"\nSaved raw products -> {out_path} ({out_path.stat().st_size} bytes)") context.browser.close() if __name__ == "__main__": main()