94 lines
2.8 KiB
Python
94 lines
2.8 KiB
Python
"""
|
|
Test DB layer: load products_300102013.json (already scraped data)
|
|
and upsert into MongoDB 'rohlik' database.
|
|
|
|
No scraping needed — just validates the db.py functions work
|
|
with real API response shapes.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import io
|
|
from pathlib import Path
|
|
from db import get_db, ensure_indexes, upsert_products, upsert_category
|
|
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
|
|
DATA_FILE = Path(__file__).parent / "products_300102013.json"
|
|
|
|
|
|
def main():
|
|
db = get_db()
|
|
print(f"Connected to: {db.client.address} / {db.name}")
|
|
|
|
ensure_indexes(db)
|
|
print("Indexes created.\n")
|
|
|
|
# --- test category upsert ---
|
|
upsert_category(db, {
|
|
"_id": 300102013,
|
|
"name": "Okurky, cukety a lilky",
|
|
"slug": "okurky-cukety-a-lilky",
|
|
"path": [300102000, 300102008, 300102013],
|
|
"pathNames": ["Ovoce a zelenina", "Zelenina", "Okurky, cukety a lilky"],
|
|
"parentId": 300102008,
|
|
"isLeaf": True,
|
|
})
|
|
print("Category 300102013 upserted.")
|
|
|
|
# --- load scraped products ---
|
|
products = json.loads(DATA_FILE.read_text(encoding="utf-8"))
|
|
print(f"Loaded {len(products)} products from {DATA_FILE.name}\n")
|
|
|
|
# split merged records back into the 4 lists that upsert_products expects
|
|
bases = []
|
|
prices_list = []
|
|
stocks = []
|
|
categories_list = []
|
|
|
|
for p in products:
|
|
base = p.get("base", {})
|
|
prices = p.get("prices", {})
|
|
stock = p.get("stock", {})
|
|
cats = p.get("categories", {})
|
|
|
|
bases.append(base)
|
|
prices_list.append(prices)
|
|
stocks.append(stock)
|
|
categories_list.append(cats)
|
|
|
|
upsert_products(db, bases, prices_list, stocks, categories_list)
|
|
print(f"Upserted {len(bases)} products.\n")
|
|
|
|
# --- verify ---
|
|
n_products = db.products.count_documents({})
|
|
n_history = db.price_history.count_documents({})
|
|
n_cats = db.categories.count_documents({})
|
|
|
|
print(f"DB counts:")
|
|
print(f" products: {n_products}")
|
|
print(f" price_history: {n_history}")
|
|
print(f" categories: {n_cats}")
|
|
|
|
# show one sample
|
|
sample = db.products.find_one({"_id": 1407650})
|
|
if sample:
|
|
print(f"\nSample product: {sample['name']}")
|
|
print(f" price: {sample['currentPrice']} {sample['currency']}")
|
|
print(f" per unit: {sample['currentPricePerUnit']}/{sample.get('unit', '?')}")
|
|
print(f" inStock: {sample['inStock']}")
|
|
print(f" sale: {sample['sale']}")
|
|
print(f" badges: {[b['title'] for b in sample.get('badges', [])]}")
|
|
|
|
# show price_history entry
|
|
hist = db.price_history.find_one({"productId": 1407650})
|
|
if hist:
|
|
print(f"\n price_history record: price={hist['price']}, "
|
|
f"inStock={hist['inStock']}, scrapedAt={hist['scrapedAt']}")
|
|
|
|
print("\nDone.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|