This commit is contained in:
2026-06-19 05:24:29 +02:00
parent 2b3e259de1
commit f0bd210198
4 changed files with 479 additions and 0 deletions
+252
View File
@@ -0,0 +1,252 @@
#!/usr/bin/env python3
r"""
EUNI -> Plex (Other Videos) export.
Stahuje videa z SeaweedFS fileru na Plex share a pojmenuje je pro knihovnu EUNI.
Schema nazvu:
single-video kurz : "<Nazev kurzu> - <Prijmeni> (<rok>).mp4"
multi-video kurz : "<Nazev kurzu> - <NN> <segment> (<rok>).mp4" (autor vynechan)
Zavislosti:
pip install pymongo requests
Pouziti:
python plex_export.py --dry-run # jen vypise plan, nic nestahuje
python plex_export.py # stahuje na default cestu (\\tower\PlexBinHex\EUNI)
python plex_export.py --dest D:\plex\EUNI # jina cilova slozka
python plex_export.py --limit 5 # stahne jen prvnich 5 (test)
Skript je idempotentni: stahuje pres .part, overuje velikost, hotove preskakuje.
Kdyz spadne nebo ho preusis (Ctrl-C), staci spustit znovu a dojede zbytek.
Cilovy stroj potrebuje sit na: Mongo 192.168.1.76, filer 192.168.1.50:8888 a cilovy share.
"""
import argparse
import re
import sys
import time
from datetime import datetime
from pathlib import Path
from urllib.parse import quote
import requests
from pymongo import MongoClient
# --- aby cestina vypisovala na Windows konzoli (cp1252) bez padu ---
for _s in (sys.stdout, sys.stderr):
try:
_s.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
DEF_MONGO = "192.168.1.76"
DEF_FILER = "http://192.168.1.50:8888/"
DEF_DEST = r"\\tower\PlexBinHex\EUNI"
MAX_NAME = 190 # bezpecna delka nazvu souboru (pod MAX_PATH)
TITLES = {"prof", "doc", "prim", "dr", "mudr", "mvdr", "pharmdr", "phdr",
"rndr", "mgr", "bc", "msc", "md", "et", "ing"}
DEGREES = {"ph.d.", "ph.d", "phd.", "phd", "csc.", "csc", "drsc.", "drsc",
"mba", "msc.", "msc", "m.d.", "md", "febo", "fesc", "fesc.", "feso",
"fean", "mha", "ph.", "d.", "fcma", "facp", "fefim", "frsph",
"febtm", "febu", "agaf", "dr."}
ILLEGAL = re.compile(r'[\\/:*?"<>|]')
# plnosirkove varianty znaku z puvodnich nazvu (vimeo/youtube downloader)
FULLWIDTH = str.maketrans({"": "", "": " ", "": "-", "": "",
"": "", "": "", "": "-", "": ""})
VIMEO_TAIL = re.compile(r"\s*\[[0-9A-Za-z_\-]+\]\s*$")
def surname(autor):
if not autor:
return None
s = autor.split(",")[0].strip()
toks = s.split()
while toks and toks[0].lower().strip(".") in TITLES:
toks.pop(0)
while toks and toks[-1].lower().strip(",") in DEGREES:
toks.pop()
if not toks:
return None
if len(toks) == 1:
return toks[0]
return " ".join(toks[1:]) # vse po krestnim jmenu -> i dvojita prijmeni
def sanitize(name):
name = name.translate(FULLWIDTH)
name = ILLEGAL.sub("", name)
name = re.sub(r"\s+", " ", name).strip().rstrip(". ")
return name
def seq_num(soubor):
"""Vytahne explicitni poradove cislo z nazvu souboru (1.-30.), nebo None."""
b = Path(soubor.replace("\\", "/")).name
b = VIMEO_TAIL.sub("", b)
b = re.sub(r"\.mp4", "", b, flags=re.I)
for pat in (r"^\s*(\d{1,2})[\.\)]", # "1. Tonometrie", "2) ..."
r"[ _]p(\d{1,2})\b", # "p01 ...", "_p02_"
r"[ _](\d{1,2})[ _]", # "TABAK_01_", "Meluzinova 3 "
r"\b(?:cast|část|díl|dil|part)[ _]*(\d{1,2})\b",
r"[ _](\d{1,2})$"): # "... 2"
m = re.search(pat, b, flags=re.I)
if m:
n = int(m.group(1))
if 1 <= n <= 30:
return n
return None
def src_id(klic):
"""vimeo numeric id (chronologicky ~ poradi nahrani), jinak None."""
m = re.search(r"vimeo:(\d+)", klic or "")
return int(m.group(1)) if m else None
def order_items(items):
"""Seradi videa jednoho kurzu do logickeho poradi."""
nums = [seq_num(it.get("soubor", "")) for it in items]
if all(n is not None for n in nums) and len(set(nums)) == len(nums):
return [it for _, it in sorted(zip(nums, items), key=lambda z: z[0])]
# fallback: poradi nahrani na vimeu, jinak abecedne dle nazvu
def k(it):
sid = src_id(it.get("klic"))
return (0, sid, "") if sid is not None else (1, 0, it.get("soubor", ""))
return sorted(items, key=k)
def seg_label(soubor, nazev):
base = Path(soubor.replace("\\", "/")).name
base = re.sub(r"\.mp4\s*", " ", base, flags=re.I)
base = VIMEO_TAIL.sub("", base).strip()
for pref in (f"EUNI kurz - {nazev} - studijní materiál -",
f"EUNI kurz - {nazev} - studijní materiál",
f"EUNI kurz - {nazev} -",
f"EUNI kurz - {nazev}",
f"{nazev} -", f"{nazev}-", nazev):
if base.lower().startswith(pref.lower()):
base = base[len(pref):].strip(" -")
break
base = base.replace("_", " ").strip()
base = re.sub(r"^\s*\d{1,2}[\.\)]\s*", "", base) # zdvojene poradove cislo (mame vlastni NN)
base = re.sub(r"\s+", " ", base).strip()
if not base or re.fullmatch(r"[0-9A-Za-z]{16,}", base) or base.lower() in {
"studijní materiál", "zaznam", "záznam", "video"}:
return ""
return base
def clip(stem):
"""Zkrati prilis dlouhy nazev (bez .mp4) na bezpecnou delku."""
return stem if len(stem) <= MAX_NAME else stem[:MAX_NAME].rstrip(" -.")
def filer_url(filer, seaweed_path):
enc = "/".join(quote(p) for p in seaweed_path.split("/"))
return filer.rstrip("/") + "/" + enc
def build_plan(db):
kurzy = {k["_id"]: k for k in db.kurzy.find({})}
vids = list(db.materialy.find(
{"druh": "video", "seaweed_fids": {"$exists": True, "$ne": []}},
{"kurz_id": 1, "soubor": 1, "seaweed_path": 1, "seaweed_size": 1,
"klic": 1}))
by_course = {}
for v in vids:
by_course.setdefault(v["kurz_id"], []).append(v)
plan = [] # (seaweed_path, filename, size)
for kid, items in by_course.items():
k = kurzy.get(kid, {})
nazev = sanitize(k.get("nazev") or items[0].get("soubor", kid))
autor = surname(k.get("autor"))
dp = k.get("datum_publikace")
rok = dp.year if isinstance(dp, datetime) else None
ystr = f" ({rok})" if rok else ""
if len(items) == 1:
v = items[0]
who = f" - {autor}" if autor else ""
fn = clip(sanitize(f"{nazev}{who}{ystr}")) + ".mp4"
plan.append((v["seaweed_path"], fn, v["seaweed_size"]))
else:
for i, v in enumerate(order_items(items), 1):
lbl = seg_label(v.get("soubor", ""), k.get("nazev") or "")
mid = f" - {i:02d} {lbl}" if lbl else f" - {i:02d}"
fn = clip(sanitize(f"{nazev}{mid}{ystr}")) + ".mp4"
plan.append((v["seaweed_path"], fn, v["seaweed_size"]))
plan.sort(key=lambda x: x[1])
return by_course, plan
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--dry-run", action="store_true")
ap.add_argument("--dest", default=DEF_DEST)
ap.add_argument("--mongo", default=DEF_MONGO)
ap.add_argument("--filer", default=DEF_FILER)
ap.add_argument("--limit", type=int, default=0, help="stahne jen N souboru")
args = ap.parse_args()
cli = MongoClient(args.mongo, serverSelectionTimeoutMS=5000)
by_course, plan = build_plan(cli["EUNI"])
total = sum(p[2] for p in plan)
print(f"Kurzu s videem: {len(by_course)} | souboru k exportu: {len(plan)} "
f"| celkem {total/1024**3:.1f} GiB\n")
if args.dry_run:
for path, fn, size in plan:
print(f"{size/1024**2:8.1f} MB {fn}")
print(f"\n[DRY-RUN] nic nestazeno. Celkem {len(plan)} souboru, "
f"{total/1024**3:.1f} GiB.")
return
dest = Path(args.dest)
dest.mkdir(parents=True, exist_ok=True)
log_path = dest / "_export_log.txt"
done = skipped = failed = 0
dl_bytes = 0
t0 = time.time()
with open(log_path, "a", encoding="utf-8") as log:
log.write(f"\n=== RUN {datetime.now():%Y-%m-%d %H:%M} | "
f"{len(plan)} planned | dest={dest} ===\n")
for n, (path, fn, size) in enumerate(plan, 1):
dst = dest / fn
if dst.exists() and dst.stat().st_size == size:
skipped += 1
continue
try:
url = filer_url(args.filer, path)
with requests.get(url, stream=True, timeout=300) as r:
r.raise_for_status()
tmp = dst.with_suffix(".part")
with open(tmp, "wb") as f:
for chunk in r.iter_content(1 << 20):
f.write(chunk)
tmp.replace(dst)
done += 1
dl_bytes += size
msg = f"[{n}/{len(plan)}] OK {size/1024**2:.1f}MB {fn}"
except Exception as e:
failed += 1
msg = f"[{n}/{len(plan)}] FAIL {fn} :: {e}"
print(msg, flush=True)
log.write(msg + "\n")
log.flush()
if args.limit and done >= args.limit:
break
dt = time.time() - t0
summary = (f"HOTOVO: {done} stazeno ({dl_bytes/1024**3:.1f} GiB), "
f"{skipped} preskoceno, {failed} chyb, {dt/60:.1f} min")
print("\n" + summary)
log.write(summary + "\n")
if __name__ == "__main__":
main()