210311a7b3
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
89 lines
2.6 KiB
Python
89 lines
2.6 KiB
Python
"""
|
|
Rohlik.cz Price Scraper - Database Operations
|
|
Version: 1.0.0
|
|
Date: 2026-05-31
|
|
|
|
MongoDB operations for the Rohlik.cz price scraper.
|
|
Collections: products, price_history, categories, scrape_runs.
|
|
MongoDB server: 192.168.1.76 (no authentication).
|
|
"""
|
|
|
|
from datetime import datetime, timezone
|
|
from pymongo import MongoClient, ASCENDING
|
|
from config import MONGO_URI, MONGO_DB
|
|
|
|
|
|
def get_db():
|
|
client = MongoClient(MONGO_URI)
|
|
return client[MONGO_DB]
|
|
|
|
|
|
def ensure_indexes(db):
|
|
db.products.create_index([("product_id", ASCENDING)], unique=True)
|
|
db.products.create_index([("category_id", ASCENDING)])
|
|
db.products.create_index([("name", ASCENDING)])
|
|
|
|
db.price_history.create_index([("product_id", ASCENDING), ("scraped_at", ASCENDING)])
|
|
db.price_history.create_index([("scraped_at", ASCENDING)])
|
|
|
|
db.categories.create_index([("category_id", ASCENDING)], unique=True)
|
|
|
|
db.scrape_runs.create_index([("started_at", ASCENDING)])
|
|
|
|
|
|
def upsert_product(db, product: dict):
|
|
now = datetime.now(timezone.utc)
|
|
product_id = product["product_id"]
|
|
|
|
db.products.update_one(
|
|
{"product_id": product_id},
|
|
{
|
|
"$set": {
|
|
"name": product["name"],
|
|
"category_id": product.get("category_id"),
|
|
"category_name": product.get("category_name"),
|
|
"amount": product.get("amount"),
|
|
"unit_price": product.get("unit_price"),
|
|
"image_url": product.get("image_url"),
|
|
"product_url": product.get("product_url"),
|
|
"category_path": product.get("category_path"),
|
|
"updated_at": now,
|
|
},
|
|
"$setOnInsert": {
|
|
"created_at": now,
|
|
},
|
|
},
|
|
upsert=True,
|
|
)
|
|
|
|
db.price_history.insert_one({
|
|
"product_id": product_id,
|
|
"price": product["price"],
|
|
"original_price": product.get("original_price"),
|
|
"discount_badge": product.get("discount_badge"),
|
|
"unit_price": product.get("unit_price"),
|
|
"scraped_at": now,
|
|
})
|
|
|
|
|
|
def upsert_category(db, category: dict):
|
|
now = datetime.now(timezone.utc)
|
|
db.categories.update_one(
|
|
{"category_id": category["category_id"]},
|
|
{
|
|
"$set": {
|
|
"name": category["name"],
|
|
"url": category["url"],
|
|
"parent_id": category.get("parent_id"),
|
|
"has_children": category.get("has_children", False),
|
|
"updated_at": now,
|
|
},
|
|
"$setOnInsert": {"created_at": now},
|
|
},
|
|
upsert=True,
|
|
)
|
|
|
|
|
|
def log_scrape_run(db, run_data: dict):
|
|
db.scrape_runs.insert_one(run_data)
|