""" Rohlik.cz Price Scraper - Database Operations Version: 1.0.0 Date: 2026-05-31 MongoDB operations for the Rohlik.cz price scraper. Collections: products, price_history, categories, scrape_runs. MongoDB server: 192.168.1.76 (no authentication). """ from datetime import datetime, timezone from pymongo import MongoClient, ASCENDING from config import MONGO_URI, MONGO_DB def get_db(): client = MongoClient(MONGO_URI) return client[MONGO_DB] def ensure_indexes(db): db.products.create_index([("product_id", ASCENDING)], unique=True) db.products.create_index([("category_id", ASCENDING)]) db.products.create_index([("name", ASCENDING)]) db.price_history.create_index([("product_id", ASCENDING), ("scraped_at", ASCENDING)]) db.price_history.create_index([("scraped_at", ASCENDING)]) db.categories.create_index([("category_id", ASCENDING)], unique=True) db.scrape_runs.create_index([("started_at", ASCENDING)]) def upsert_product(db, product: dict): now = datetime.now(timezone.utc) product_id = product["product_id"] db.products.update_one( {"product_id": product_id}, { "$set": { "name": product["name"], "category_id": product.get("category_id"), "category_name": product.get("category_name"), "amount": product.get("amount"), "unit_price": product.get("unit_price"), "image_url": product.get("image_url"), "product_url": product.get("product_url"), "category_path": product.get("category_path"), "updated_at": now, }, "$setOnInsert": { "created_at": now, }, }, upsert=True, ) db.price_history.insert_one({ "product_id": product_id, "price": product["price"], "original_price": product.get("original_price"), "discount_badge": product.get("discount_badge"), "unit_price": product.get("unit_price"), "scraped_at": now, }) def upsert_category(db, category: dict): now = datetime.now(timezone.utc) db.categories.update_one( {"category_id": category["category_id"]}, { "$set": { "name": category["name"], "url": category["url"], "parent_id": category.get("parent_id"), "has_children": category.get("has_children", False), "updated_at": now, }, "$setOnInsert": {"created_at": now}, }, upsert=True, ) def log_scrape_run(db, run_data: dict): db.scrape_runs.insert_one(run_data)