z230
This commit is contained in:
+190
@@ -0,0 +1,190 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
euni_db.py — připojení a operace nad MongoDB databází EUNI.
|
||||
|
||||
Server: mongodb://192.168.1.76:27017 (bez hesla), databáze "EUNI".
|
||||
|
||||
Kolekce:
|
||||
kurzy — 1 dokument na kurz (metadata + počty)
|
||||
materialy — 1 dokument na stahovatelný soubor (video/dokument) + stav stahování
|
||||
|
||||
Idempotence: materialy mají unikátní index {kurz_id, klic}. Upsert nový soubor
|
||||
založí jako "ceka"; u existujícího NEPŘEPÍŠE stav stahování (jen popisná pole).
|
||||
"""
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pymongo
|
||||
|
||||
MONGO_URI = os.environ.get("EUNI_MONGO_URI", "mongodb://192.168.1.76:27017")
|
||||
DB_NAME = "EUNI"
|
||||
|
||||
# stavy materiálu
|
||||
CEKA = "ceka"
|
||||
STAZENO = "stazeno"
|
||||
PRESKOCENO = "preskoceno"
|
||||
CHYBA = "chyba"
|
||||
|
||||
|
||||
def now():
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def get_db():
|
||||
client = pymongo.MongoClient(MONGO_URI, serverSelectionTimeoutMS=4000)
|
||||
client.admin.command("ping")
|
||||
return client[DB_NAME]
|
||||
|
||||
|
||||
def ensure_indexes(db=None):
|
||||
if db is None:
|
||||
db = get_db()
|
||||
db.materialy.create_index([("kurz_id", 1), ("klic", 1)], unique=True,
|
||||
name="uniq_kurz_klic")
|
||||
db.materialy.create_index("stav", name="stav")
|
||||
db.materialy.create_index([("druh", 1), ("stav", 1)], name="druh_stav")
|
||||
db.kurzy.create_index("profese", name="profese")
|
||||
return db
|
||||
|
||||
|
||||
# ----------------------------------------------------------------- kurzy ------
|
||||
def upsert_kurz(db, kurz: dict):
|
||||
"""Vloží/aktualizuje kurz. Zachová first_seen, profese sjednotí."""
|
||||
_id = kurz["id"]
|
||||
sets = {
|
||||
"slug": kurz.get("slug"),
|
||||
"nazev": kurz.get("nazev") or kurz.get("title"),
|
||||
"url": kurz.get("url"),
|
||||
"autor": kurz.get("autor"),
|
||||
"autor_medailonek_url": kurz.get("autor_medailonek_url"),
|
||||
"datum_publikace": kurz.get("datum_publikace"),
|
||||
"revidovano": kurz.get("revidovano"),
|
||||
"akreditace": kurz.get("akreditace"),
|
||||
"kredity": kurz.get("kredity"),
|
||||
"pocet_videi": kurz.get("pocet_videi"),
|
||||
"pocet_dokumentu": kurz.get("pocet_dokumentu"),
|
||||
"updated_at": now(),
|
||||
}
|
||||
profese = kurz.get("profese") or []
|
||||
db.kurzy.update_one(
|
||||
{"_id": _id},
|
||||
{
|
||||
"$set": sets,
|
||||
"$setOnInsert": {"first_seen": now()},
|
||||
"$addToSet": {"profese": {"$each": profese}} if profese else {},
|
||||
} if profese else {
|
||||
"$set": sets,
|
||||
"$setOnInsert": {"first_seen": now()},
|
||||
},
|
||||
upsert=True,
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------- materialy -----
|
||||
def upsert_material(db, mat: dict):
|
||||
"""Idempotentní upsert souboru. Nepřepíše stav existujícího záznamu."""
|
||||
klic_filter = {"kurz_id": mat["kurz_id"], "klic": mat["klic"]}
|
||||
popisne = {
|
||||
"kurz_nazev": mat.get("kurz_nazev"),
|
||||
"druh": mat.get("druh"),
|
||||
"platforma": mat.get("platforma"),
|
||||
"zdroj_url": mat.get("zdroj_url"),
|
||||
"watch_url": mat.get("watch_url"),
|
||||
"popis": mat.get("popis"),
|
||||
"pripona": mat.get("pripona"),
|
||||
"updated_at": now(),
|
||||
}
|
||||
db.materialy.update_one(
|
||||
klic_filter,
|
||||
{
|
||||
"$set": popisne,
|
||||
"$setOnInsert": {
|
||||
"stav": CEKA,
|
||||
"duvod": None,
|
||||
"soubor": None,
|
||||
"velikost_b": None,
|
||||
"pokusy": 0,
|
||||
"posledni_chyba": None,
|
||||
"stazeno_at": None,
|
||||
"first_seen": now(),
|
||||
},
|
||||
},
|
||||
upsert=True,
|
||||
)
|
||||
|
||||
|
||||
def set_status(db, kurz_id, klic, stav, soubor=None, velikost_b=None,
|
||||
duvod=None, chyba=None):
|
||||
"""Nastaví výsledek stahování jednoho materiálu."""
|
||||
sets = {"stav": stav, "updated_at": now()}
|
||||
if stav == STAZENO:
|
||||
sets.update({"soubor": soubor, "velikost_b": velikost_b,
|
||||
"duvod": None, "posledni_chyba": None, "stazeno_at": now()})
|
||||
elif stav == PRESKOCENO:
|
||||
sets.update({"duvod": duvod})
|
||||
elif stav == CHYBA:
|
||||
sets.update({"posledni_chyba": chyba})
|
||||
upd = {"$set": sets}
|
||||
if stav in (STAZENO, CHYBA):
|
||||
upd["$inc"] = {"pokusy": 1}
|
||||
db.materialy.update_one({"kurz_id": kurz_id, "klic": klic}, upd)
|
||||
|
||||
|
||||
def set_seaweed(db, kurz_id, klic, path, fids=None, md5=None, size=None):
|
||||
"""Uloží referenci na kopii v SeaweedFS (cesta + fid chunků)."""
|
||||
db.materialy.update_one(
|
||||
{"kurz_id": kurz_id, "klic": klic},
|
||||
{"$set": {
|
||||
"seaweed_path": path,
|
||||
"seaweed_fids": fids or [],
|
||||
"seaweed_md5": md5,
|
||||
"seaweed_size": size,
|
||||
"seaweed_at": now(),
|
||||
"updated_at": now(),
|
||||
}},
|
||||
)
|
||||
|
||||
|
||||
def materialy_bez_seaweed(db):
|
||||
"""Stažené materiály, které ještě nemají kopii v SeaweedFS (pro backfill)."""
|
||||
return list(db.materialy.find({
|
||||
"stav": STAZENO,
|
||||
"soubor": {"$ne": None},
|
||||
"$or": [{"seaweed_path": {"$exists": False}}, {"seaweed_path": None}],
|
||||
}))
|
||||
|
||||
|
||||
def materialy_v_seaweed(db):
|
||||
"""Materiály s kopií v SeaweedFS (pro restore)."""
|
||||
return list(db.materialy.find({"seaweed_path": {"$exists": True, "$ne": None}}))
|
||||
|
||||
|
||||
def cekajici_materialy(db, druh=None, vcetne_chyb=False):
|
||||
"""Vrátí materiály ke stažení (stav 'ceka', volitelně i 'chyba')."""
|
||||
stavy = [CEKA] + ([CHYBA] if vcetne_chyb else [])
|
||||
q = {"stav": {"$in": stavy}}
|
||||
if druh:
|
||||
q["druh"] = druh
|
||||
return list(db.materialy.find(q))
|
||||
|
||||
|
||||
# ----------------------------------------------------------------- stats ------
|
||||
def stats(db=None):
|
||||
if db is None:
|
||||
db = get_db()
|
||||
out = {"kurzy": db.kurzy.count_documents({})}
|
||||
pipe = [{"$group": {"_id": {"druh": "$druh", "stav": "$stav"},
|
||||
"n": {"$sum": 1}}}]
|
||||
for row in db.materialy.aggregate(pipe):
|
||||
d = row["_id"]["druh"]
|
||||
st = row["_id"]["stav"]
|
||||
out.setdefault(d, {})[st] = row["n"]
|
||||
return out
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import json
|
||||
db = ensure_indexes()
|
||||
print("Připojeno k EUNI na", MONGO_URI)
|
||||
print(json.dumps(stats(db), ensure_ascii=False, indent=2))
|
||||
Reference in New Issue
Block a user