""" Test DB layer: load products_300102013.json (already scraped data) and upsert into MongoDB 'rohlik' database. No scraping needed — just validates the db.py functions work with real API response shapes. """ import json import sys import io from pathlib import Path from db import get_db, ensure_indexes, upsert_products, upsert_category sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace") DATA_FILE = Path(__file__).parent / "products_300102013.json" def main(): db = get_db() print(f"Connected to: {db.client.address} / {db.name}") ensure_indexes(db) print("Indexes created.\n") # --- test category upsert --- upsert_category(db, { "_id": 300102013, "name": "Okurky, cukety a lilky", "slug": "okurky-cukety-a-lilky", "path": [300102000, 300102008, 300102013], "pathNames": ["Ovoce a zelenina", "Zelenina", "Okurky, cukety a lilky"], "parentId": 300102008, "isLeaf": True, }) print("Category 300102013 upserted.") # --- load scraped products --- products = json.loads(DATA_FILE.read_text(encoding="utf-8")) print(f"Loaded {len(products)} products from {DATA_FILE.name}\n") # split merged records back into the 4 lists that upsert_products expects bases = [] prices_list = [] stocks = [] categories_list = [] for p in products: base = p.get("base", {}) prices = p.get("prices", {}) stock = p.get("stock", {}) cats = p.get("categories", {}) bases.append(base) prices_list.append(prices) stocks.append(stock) categories_list.append(cats) upsert_products(db, bases, prices_list, stocks, categories_list) print(f"Upserted {len(bases)} products.\n") # --- verify --- n_products = db.products.count_documents({}) n_history = db.price_history.count_documents({}) n_cats = db.categories.count_documents({}) print(f"DB counts:") print(f" products: {n_products}") print(f" price_history: {n_history}") print(f" categories: {n_cats}") # show one sample sample = db.products.find_one({"_id": 1407650}) if sample: print(f"\nSample product: {sample['name']}") print(f" price: {sample['currentPrice']} {sample['currency']}") print(f" per unit: {sample['currentPricePerUnit']}/{sample.get('unit', '?')}") print(f" inStock: {sample['inStock']}") print(f" sale: {sample['sale']}") print(f" badges: {[b['title'] for b in sample.get('badges', [])]}") # show price_history entry hist = db.price_history.find_one({"productId": 1407650}) if hist: print(f"\n price_history record: price={hist['price']}, " f"inStock={hist['inStock']}, scrapedAt={hist['scrapedAt']}") print("\nDone.") if __name__ == "__main__": main()