notebookVB
This commit is contained in:
@@ -0,0 +1,93 @@
|
||||
"""
|
||||
Test DB layer: load products_300102013.json (already scraped data)
|
||||
and upsert into MongoDB 'rohlik' database.
|
||||
|
||||
No scraping needed — just validates the db.py functions work
|
||||
with real API response shapes.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import io
|
||||
from pathlib import Path
|
||||
from db import get_db, ensure_indexes, upsert_products, upsert_category
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
DATA_FILE = Path(__file__).parent / "products_300102013.json"
|
||||
|
||||
|
||||
def main():
|
||||
db = get_db()
|
||||
print(f"Connected to: {db.client.address} / {db.name}")
|
||||
|
||||
ensure_indexes(db)
|
||||
print("Indexes created.\n")
|
||||
|
||||
# --- test category upsert ---
|
||||
upsert_category(db, {
|
||||
"_id": 300102013,
|
||||
"name": "Okurky, cukety a lilky",
|
||||
"slug": "okurky-cukety-a-lilky",
|
||||
"path": [300102000, 300102008, 300102013],
|
||||
"pathNames": ["Ovoce a zelenina", "Zelenina", "Okurky, cukety a lilky"],
|
||||
"parentId": 300102008,
|
||||
"isLeaf": True,
|
||||
})
|
||||
print("Category 300102013 upserted.")
|
||||
|
||||
# --- load scraped products ---
|
||||
products = json.loads(DATA_FILE.read_text(encoding="utf-8"))
|
||||
print(f"Loaded {len(products)} products from {DATA_FILE.name}\n")
|
||||
|
||||
# split merged records back into the 4 lists that upsert_products expects
|
||||
bases = []
|
||||
prices_list = []
|
||||
stocks = []
|
||||
categories_list = []
|
||||
|
||||
for p in products:
|
||||
base = p.get("base", {})
|
||||
prices = p.get("prices", {})
|
||||
stock = p.get("stock", {})
|
||||
cats = p.get("categories", {})
|
||||
|
||||
bases.append(base)
|
||||
prices_list.append(prices)
|
||||
stocks.append(stock)
|
||||
categories_list.append(cats)
|
||||
|
||||
upsert_products(db, bases, prices_list, stocks, categories_list)
|
||||
print(f"Upserted {len(bases)} products.\n")
|
||||
|
||||
# --- verify ---
|
||||
n_products = db.products.count_documents({})
|
||||
n_history = db.price_history.count_documents({})
|
||||
n_cats = db.categories.count_documents({})
|
||||
|
||||
print(f"DB counts:")
|
||||
print(f" products: {n_products}")
|
||||
print(f" price_history: {n_history}")
|
||||
print(f" categories: {n_cats}")
|
||||
|
||||
# show one sample
|
||||
sample = db.products.find_one({"_id": 1407650})
|
||||
if sample:
|
||||
print(f"\nSample product: {sample['name']}")
|
||||
print(f" price: {sample['currentPrice']} {sample['currency']}")
|
||||
print(f" per unit: {sample['currentPricePerUnit']}/{sample.get('unit', '?')}")
|
||||
print(f" inStock: {sample['inStock']}")
|
||||
print(f" sale: {sample['sale']}")
|
||||
print(f" badges: {[b['title'] for b in sample.get('badges', [])]}")
|
||||
|
||||
# show price_history entry
|
||||
hist = db.price_history.find_one({"productId": 1407650})
|
||||
if hist:
|
||||
print(f"\n price_history record: price={hist['price']}, "
|
||||
f"inStock={hist['inStock']}, scrapedAt={hist['scrapedAt']}")
|
||||
|
||||
print("\nDone.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user