Files
rohlik/10PriceScraping/Rohlik/Trash/test_db.py
T
2026-06-01 07:24:46 +02:00

94 lines
2.8 KiB
Python

"""
Test DB layer: load products_300102013.json (already scraped data)
and upsert into MongoDB 'rohlik' database.
No scraping needed — just validates the db.py functions work
with real API response shapes.
"""
import json
import sys
import io
from pathlib import Path
from db import get_db, ensure_indexes, upsert_products, upsert_category
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
DATA_FILE = Path(__file__).parent / "products_300102013.json"
def main():
db = get_db()
print(f"Connected to: {db.client.address} / {db.name}")
ensure_indexes(db)
print("Indexes created.\n")
# --- test category upsert ---
upsert_category(db, {
"_id": 300102013,
"name": "Okurky, cukety a lilky",
"slug": "okurky-cukety-a-lilky",
"path": [300102000, 300102008, 300102013],
"pathNames": ["Ovoce a zelenina", "Zelenina", "Okurky, cukety a lilky"],
"parentId": 300102008,
"isLeaf": True,
})
print("Category 300102013 upserted.")
# --- load scraped products ---
products = json.loads(DATA_FILE.read_text(encoding="utf-8"))
print(f"Loaded {len(products)} products from {DATA_FILE.name}\n")
# split merged records back into the 4 lists that upsert_products expects
bases = []
prices_list = []
stocks = []
categories_list = []
for p in products:
base = p.get("base", {})
prices = p.get("prices", {})
stock = p.get("stock", {})
cats = p.get("categories", {})
bases.append(base)
prices_list.append(prices)
stocks.append(stock)
categories_list.append(cats)
upsert_products(db, bases, prices_list, stocks, categories_list)
print(f"Upserted {len(bases)} products.\n")
# --- verify ---
n_products = db.products.count_documents({})
n_history = db.price_history.count_documents({})
n_cats = db.categories.count_documents({})
print(f"DB counts:")
print(f" products: {n_products}")
print(f" price_history: {n_history}")
print(f" categories: {n_cats}")
# show one sample
sample = db.products.find_one({"_id": 1407650})
if sample:
print(f"\nSample product: {sample['name']}")
print(f" price: {sample['currentPrice']} {sample['currency']}")
print(f" per unit: {sample['currentPricePerUnit']}/{sample.get('unit', '?')}")
print(f" inStock: {sample['inStock']}")
print(f" sale: {sample['sale']}")
print(f" badges: {[b['title'] for b in sample.get('badges', [])]}")
# show price_history entry
hist = db.price_history.find_one({"productId": 1407650})
if hist:
print(f"\n price_history record: price={hist['price']}, "
f"inStock={hist['inStock']}, scrapedAt={hist['scrapedAt']}")
print("\nDone.")
if __name__ == "__main__":
main()