Merge remote-tracking branch 'origin/master'

This commit is contained in:
2026-06-19 07:52:46 +02:00
4 changed files with 493 additions and 0 deletions
+265
View File
@@ -0,0 +1,265 @@
#!/usr/bin/env python3
r"""
EUNI -> Plex (Other Videos) export.
Stahuje videa z SeaweedFS fileru na Plex share a pojmenuje je pro knihovnu EUNI.
Schema nazvu:
single-video kurz : "<Nazev kurzu> - <Prijmeni> (<rok>).mp4"
multi-video kurz : "<Nazev kurzu> - <NN> <segment> (<rok>).mp4" (autor vynechan)
Zavislosti:
pip install pymongo requests
Pouziti:
python plex_export.py --dry-run # jen vypise plan, nic nestahuje
python plex_export.py # stahuje na default cestu (\\tower\PlexBinHex\EUNI)
python plex_export.py --dest D:\plex\EUNI # jina cilova slozka
python plex_export.py --limit 5 # stahne jen prvnich 5 (test)
Skript je idempotentni: stahuje pres .part, overuje velikost, hotove preskakuje.
Kdyz spadne nebo ho preusis (Ctrl-C), staci spustit znovu a dojede zbytek.
Cilovy stroj potrebuje sit na: Mongo 192.168.1.76, filer 192.168.1.50:8888 a cilovy share.
"""
import argparse
import re
import sys
import time
from datetime import datetime
from pathlib import Path
from urllib.parse import quote
import requests
from pymongo import MongoClient
# --- aby cestina vypisovala na Windows konzoli (cp1252) bez padu ---
for _s in (sys.stdout, sys.stderr):
try:
_s.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
DEF_MONGO = "192.168.1.76"
DEF_FILER = "http://192.168.1.50:8888/"
DEF_DEST = r"\\tower\PlexBinHex\EUNI"
MAX_NAME = 190 # bezpecna delka nazvu souboru (pod MAX_PATH)
TITLES = {"prof", "doc", "prim", "dr", "mudr", "mvdr", "pharmdr", "phdr",
"rndr", "mgr", "bc", "msc", "md", "et", "ing"}
DEGREES = {"ph.d.", "ph.d", "phd.", "phd", "csc.", "csc", "drsc.", "drsc",
"mba", "msc.", "msc", "m.d.", "md", "febo", "fesc", "fesc.", "feso",
"fean", "mha", "ph.", "d.", "fcma", "facp", "fefim", "frsph",
"febtm", "febu", "agaf", "dr."}
ILLEGAL = re.compile(r'[\\/:*?"<>|]')
# plnosirkove varianty znaku z puvodnich nazvu (vimeo/youtube downloader)
FULLWIDTH = str.maketrans({"": "", "": " ", "": "-", "": "",
"": "", "": "", "": "-", "": ""})
VIMEO_TAIL = re.compile(r"\s*\[[0-9A-Za-z_\-]+\]\s*$")
def surname(autor):
if not autor:
return None
s = autor.split(",")[0].strip()
toks = s.split()
while toks and toks[0].lower().strip(".") in TITLES:
toks.pop(0)
while toks and toks[-1].lower().strip(",") in DEGREES:
toks.pop()
if not toks:
return None
if len(toks) == 1:
return toks[0]
return " ".join(toks[1:]) # vse po krestnim jmenu -> i dvojita prijmeni
def sanitize(name):
name = name.translate(FULLWIDTH)
name = ILLEGAL.sub("", name)
name = re.sub(r"\s+", " ", name).strip().rstrip(". ")
return name
def seq_num(soubor):
"""Vytahne explicitni poradove cislo z nazvu souboru (1.-30.), nebo None."""
b = Path(soubor.replace("\\", "/")).name
b = VIMEO_TAIL.sub("", b)
b = re.sub(r"\.mp4", "", b, flags=re.I)
for pat in (r"^\s*(\d{1,2})[\.\)]", # "1. Tonometrie", "2) ..."
r"[ _]p(\d{1,2})\b", # "p01 ...", "_p02_"
r"[ _](\d{1,2})[ _]", # "TABAK_01_", "Meluzinova 3 "
r"\b(?:cast|část|díl|dil|part)[ _]*(\d{1,2})\b",
r"[ _](\d{1,2})$"): # "... 2"
m = re.search(pat, b, flags=re.I)
if m:
n = int(m.group(1))
if 1 <= n <= 30:
return n
return None
def src_id(klic):
"""vimeo numeric id (chronologicky ~ poradi nahrani), jinak None."""
m = re.search(r"vimeo:(\d+)", klic or "")
return int(m.group(1)) if m else None
def order_items(items):
"""Seradi videa jednoho kurzu do logickeho poradi."""
nums = [seq_num(it.get("soubor", "")) for it in items]
if all(n is not None for n in nums) and len(set(nums)) == len(nums):
return [it for _, it in sorted(zip(nums, items), key=lambda z: z[0])]
# fallback: poradi nahrani na vimeu, jinak abecedne dle nazvu
def k(it):
sid = src_id(it.get("klic"))
return (0, sid, "") if sid is not None else (1, 0, it.get("soubor", ""))
return sorted(items, key=k)
def seg_label(soubor, nazev):
base = Path(soubor.replace("\\", "/")).name
base = re.sub(r"\.mp4\s*", " ", base, flags=re.I)
base = VIMEO_TAIL.sub("", base).strip()
for pref in (f"EUNI kurz - {nazev} - studijní materiál -",
f"EUNI kurz - {nazev} - studijní materiál",
f"EUNI kurz - {nazev} -",
f"EUNI kurz - {nazev}",
f"{nazev} -", f"{nazev}-", nazev):
if base.lower().startswith(pref.lower()):
base = base[len(pref):].strip(" -")
break
base = base.replace("_", " ").strip()
base = re.sub(r"^\s*\d{1,2}[\.\)]\s*", "", base) # zdvojene poradove cislo (mame vlastni NN)
base = re.sub(r"\s+", " ", base).strip()
if not base or re.fullmatch(r"[0-9A-Za-z]{16,}", base) or base.lower() in {
"studijní materiál", "zaznam", "záznam", "video"}:
return ""
return base
def clip(stem):
"""Zkrati prilis dlouhy nazev (bez .mp4) na bezpecnou delku."""
return stem if len(stem) <= MAX_NAME else stem[:MAX_NAME].rstrip(" -.")
def filer_url(filer, seaweed_path):
enc = "/".join(quote(p) for p in seaweed_path.split("/"))
return filer.rstrip("/") + "/" + enc
def build_plan(db):
kurzy = {k["_id"]: k for k in db.kurzy.find({})}
vids = list(db.materialy.find(
{"druh": "video", "seaweed_fids": {"$exists": True, "$ne": []}},
{"kurz_id": 1, "soubor": 1, "seaweed_path": 1, "seaweed_size": 1,
"klic": 1}))
by_course = {}
for v in vids:
by_course.setdefault(v["kurz_id"], []).append(v)
plan = [] # (seaweed_path, filename, size)
for kid, items in by_course.items():
k = kurzy.get(kid, {})
nazev = sanitize(k.get("nazev") or items[0].get("soubor", kid))
autor = surname(k.get("autor"))
dp = k.get("datum_publikace")
rok = dp.year if isinstance(dp, datetime) else None
ystr = f" ({rok})" if rok else ""
if len(items) == 1:
v = items[0]
who = f" - {autor}" if autor else ""
fn = clip(sanitize(f"{nazev}{who}{ystr}")) + ".mp4"
plan.append((v["seaweed_path"], fn, v["seaweed_size"]))
else:
for i, v in enumerate(order_items(items), 1):
lbl = seg_label(v.get("soubor", ""), k.get("nazev") or "")
mid = f" - {i:02d} {lbl}" if lbl else f" - {i:02d}"
fn = clip(sanitize(f"{nazev}{mid}{ystr}")) + ".mp4"
plan.append((v["seaweed_path"], fn, v["seaweed_size"]))
plan.sort(key=lambda x: x[1])
return by_course, plan
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--dry-run", action="store_true")
ap.add_argument("--dest", default=DEF_DEST)
ap.add_argument("--mongo", default=DEF_MONGO)
ap.add_argument("--filer", default=DEF_FILER)
ap.add_argument("--limit", type=int, default=0, help="stahne jen N souboru")
args = ap.parse_args()
cli = MongoClient(args.mongo, serverSelectionTimeoutMS=5000)
by_course, plan = build_plan(cli["EUNI"])
total = sum(p[2] for p in plan)
print(f"Kurzu s videem: {len(by_course)} | souboru k exportu: {len(plan)} "
f"| celkem {total/1024**3:.1f} GiB\n")
if args.dry_run:
for path, fn, size in plan:
print(f"{size/1024**2:8.1f} MB {fn}")
print(f"\n[DRY-RUN] nic nestazeno. Celkem {len(plan)} souboru, "
f"{total/1024**3:.1f} GiB.")
return
dest = Path(args.dest)
dest.mkdir(parents=True, exist_ok=True)
log_path = dest / "_export_log.txt"
# preflight: dosahnu na filer?
try:
requests.get(args.filer, timeout=8)
print(f"Filer OK: {args.filer}\n", flush=True)
except Exception as e:
sys.exit(f"NEDOSTUPNY FILER {args.filer} :: {e}\n"
f"Zkontroluj sit / VPN na 192.168.1.50:8888 z tohoto stroje.")
done = skipped = failed = 0
dl_bytes = 0
t0 = time.time()
with open(log_path, "a", encoding="utf-8") as log:
log.write(f"\n=== RUN {datetime.now():%Y-%m-%d %H:%M} | "
f"{len(plan)} planned | dest={dest} ===\n")
for n, (path, fn, size) in enumerate(plan, 1):
dst = dest / fn
if dst.exists() and dst.stat().st_size == size:
skipped += 1
continue
print(f"[{n}/{len(plan)}] ↓ {size/1024**2:.1f}MB {fn}", flush=True)
try:
url = filer_url(args.filer, path)
ts = time.time()
# timeout=(connect, read) -> zaseknute spojeni spadne rychle
with requests.get(url, stream=True, timeout=(15, 90)) as r:
r.raise_for_status()
tmp = dst.with_suffix(".part")
with open(tmp, "wb") as f:
for chunk in r.iter_content(1 << 20):
f.write(chunk)
tmp.replace(dst)
done += 1
dl_bytes += size
sp = size / 1024**2 / max(time.time() - ts, 0.1)
msg = f"[{n}/{len(plan)}] OK {size/1024**2:.1f}MB ({sp:.1f} MB/s) {fn}"
except Exception as e:
failed += 1
msg = f"[{n}/{len(plan)}] FAIL {fn} :: {e}"
print(msg, flush=True)
log.write(msg + "\n")
log.flush()
if args.limit and done >= args.limit:
break
dt = time.time() - t0
summary = (f"HOTOVO: {done} stazeno ({dl_bytes/1024**3:.1f} GiB), "
f"{skipped} preskoceno, {failed} chyb, {dt/60:.1f} min")
print("\n" + summary)
log.write(summary + "\n")
if __name__ == "__main__":
main()
+207
View File
@@ -0,0 +1,207 @@
#!/usr/bin/env python3
"""
EUNI -> Plex: druhy pruchod, doplni metadata k uz naskenovanym videim.
Paruje polozky v Plex sekci EUNI s kurzy v Mongu (podle nazvu souboru = build_plan
z plex_export.py) a zapisuje: Originally Available, Studio, Summary, Genre, Label,
a u multi-video kurzu Collection (sloucí díly k sobe). Pole zamyka (.locked=1),
takze je sken neprepise. Idempotentni - lze poustet opakovane.
Zavislosti:
pip install pymongo requests
Pouziti:
set PLEX_TOKEN=... # (Windows) nebo export PLEX_TOKEN=...
python plex_meta.py --dry-run # vypise co by zapsal, nic nemeni
python plex_meta.py # zapise
python plex_meta.py --token XXXX --section 23
"""
import argparse
import os
import sys
from datetime import datetime
import requests
from pymongo import MongoClient
import plex_export as px # sdili build_plan() -> stejne nazvy souboru
for _s in (sys.stdout, sys.stderr):
try:
_s.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
DEF_PLEX = "http://192.168.1.76:32400"
DEF_SECTION = 23 # EUNI (Other Videos)
DEF_TOKEN = "Em6_tQ7DizF2s36-9_Jx" # natvrdo; lze prebit env PLEX_TOKEN nebo --token
# Mapovani profese kodu EUNI -> nazev (nezname kody se vynechaji).
# Cely soucasny batch je profese 2 = Lékař. Dalsi profese doplnit az se dotahnou.
PROFESE = {
2: "Lékař",
# 1: "...", 3: "Farmaceut", ... # doplnit pri dalsich profesich
}
def build_kurz_index(db):
"""stem nazvu souboru (bez .mp4) -> (kurz_doc, multi?)."""
kurzy = {k["_id"]: k for k in db.kurzy.find({})}
_, plan = px.build_plan(db)
# kolik souboru ma kurz -> multi?
from collections import Counter
# plan nezna kurz_id; znovu spocteme pres materialy
vids = list(db.materialy.find(
{"druh": "video", "seaweed_fids": {"$exists": True, "$ne": []}},
{"kurz_id": 1, "soubor": 1, "seaweed_path": 1, "seaweed_size": 1, "klic": 1}))
by_course = {}
for v in vids:
by_course.setdefault(v["kurz_id"], []).append(v)
idx = {}
for kid, items in by_course.items():
k = kurzy.get(kid, {})
nazev = px.sanitize(k.get("nazev") or items[0].get("soubor", kid))
autor = px.surname(k.get("autor"))
dp = k.get("datum_publikace")
rok = dp.year if isinstance(dp, datetime) else None
ystr = f" ({rok})" if rok else ""
multi = len(items) > 1
if not multi:
who = f" - {autor}" if autor else ""
stem = px.clip(px.sanitize(f"{nazev}{who}{ystr}"))
idx[stem] = (k, False)
else:
for i, v in enumerate(px.order_items(items), 1):
lbl = px.seg_label(v.get("soubor", ""), k.get("nazev") or "")
mid = f" - {i:02d} {lbl}" if lbl else f" - {i:02d}"
stem = px.clip(px.sanitize(f"{nazev}{mid}{ystr}"))
idx[stem] = (k, True)
return idx
def make_summary(k):
lines = []
if k.get("autor"):
lines.append(f"Autor: {k['autor']}")
bits = []
if k.get("akreditace"):
bits.append(f"Akreditace {k['akreditace']}")
if k.get("kredity"):
bits.append(f"{k['kredity']} kreditů")
if bits:
lines.append(" · ".join(bits))
if k.get("url"):
lines.append(f"Zdroj: {k['url']}")
return "\n".join(lines)
def fix_poster(base, token, rk):
"""Vybere nejvetsi auto-generovany nahled jako poster (vyhne se cernemu snimku).
Vraci True kdyz nejaky nastavil."""
import xml.etree.ElementTree as ET
r = requests.get(f"{base}/library/metadata/{rk}/posters",
params={"X-Plex-Token": token}, timeout=15)
r.raise_for_status()
root = ET.fromstring(r.content)
cands = [p.get("ratingKey") for p in root.findall("Photo")
if (p.get("ratingKey") or "").startswith("media://")]
if not cands:
return False
best, bsz = None, -1
for u in cands:
d = requests.get(f"{base}/library/metadata/{rk}/file",
params={"url": u, "X-Plex-Token": token}, timeout=15)
if len(d.content) > bsz:
bsz, best = len(d.content), u
requests.put(f"{base}/library/metadata/{rk}/poster",
params={"url": best, "X-Plex-Token": token}, timeout=15)
return True
def plex_items(base, section, token):
r = requests.get(f"{base}/library/sections/{section}/all",
params={"X-Plex-Token": token}, timeout=15)
r.raise_for_status()
import xml.etree.ElementTree as ET
root = ET.fromstring(r.content)
return [(v.get("ratingKey"), v.get("title")) for v in root.findall("Video")]
def push(base, section, token, rating_key, k, multi, nazev, dry):
params = [("type", "1"), ("id", rating_key), ("X-Plex-Token", token),
("studio.value", "EUNI"), ("studio.locked", "1"),
("label[0].tag.tag", "EUNI"), ("label.locked", "1"),
("genre[0].tag.tag", "EUNI kurz"), ("genre.locked", "1")]
dp = k.get("datum_publikace")
if isinstance(dp, datetime):
params += [("originallyAvailableAt.value", dp.strftime("%Y-%m-%d")),
("originallyAvailableAt.locked", "1")]
summ = make_summary(k)
if summ:
params += [("summary.value", summ), ("summary.locked", "1")]
# profese -> dalsi genre (jen znamne kody)
gi = 1
for code in (k.get("profese") or []):
name = PROFESE.get(code)
if name:
params.append((f"genre[{gi}].tag.tag", name))
gi += 1
if multi:
params += [("collection[0].tag.tag", nazev), ("collection.locked", "1")]
if dry:
return "DRY"
r = requests.put(f"{base}/library/sections/{section}/all", params=params,
timeout=15)
return r.status_code
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--dry-run", action="store_true")
ap.add_argument("--plex", default=DEF_PLEX)
ap.add_argument("--section", type=int, default=DEF_SECTION)
ap.add_argument("--mongo", default=px.DEF_MONGO)
ap.add_argument("--token", default=os.environ.get("PLEX_TOKEN") or DEF_TOKEN)
ap.add_argument("--no-poster", action="store_true",
help="nevybirat nahled (jen metadata)")
args = ap.parse_args()
if not args.token:
sys.exit("Chybi PLEX_TOKEN (env nebo --token).")
db = MongoClient(args.mongo, serverSelectionTimeoutMS=5000)["EUNI"]
idx = build_kurz_index(db)
items = plex_items(args.plex, args.section, args.token)
print(f"Plex polozek: {len(items)} | namapovano kurzu: {len(idx)}\n")
ok = miss = fail = 0
for rk, title in items:
hit = idx.get(title)
if not hit:
miss += 1
print(f" ? bez parovani: {title}")
continue
k, multi = hit
try:
code = push(args.plex, args.section, args.token, rk, k, multi,
px.sanitize(k.get("nazev", "")), args.dry_run)
if code in (200, "DRY"):
ok += 1
if code == 200 and not args.no_poster:
try:
fix_poster(args.plex, args.token, rk)
except Exception as e:
print(f" poster? {title} :: {e}")
else:
fail += 1
print(f" FAIL {code}: {title}")
except Exception as e:
fail += 1
print(f" FAIL {title} :: {e}")
tag = "[DRY-RUN] " if args.dry_run else ""
print(f"\n{tag}OK {ok}, nenamapovano {miss}, chyb {fail}")
if __name__ == "__main__":
main()
+1
View File
@@ -18,3 +18,4 @@
- [Admin PowerShell nefunguje](feedback_admin_powershell.md) — příkazy vyžadující admin práva rovnou napsat uživateli, nezkoušet alternativy
- [SeaweedFS na Tower1](project_seaweedfs.md) — kontejner na .50 (bloby na array, metadata SSD), noční backup filer metadat na tower .76, retence 7+4
- [SeaweedFS offsite záloha](project_seaweedfs_offsite.md) — offsite na Synology DS213+ v ordinaci; NAS-pull rsync přes MikroTik 2250→.50 FUSE mount; ~50 GiB ověřeno; runbook v Triliu
- [EUNI kurzy do Plexu](project_euni_plex.md) — `plex_export.py` (stáhne 334 videí z SeaweedFS na Plex share, pojmenuje) + `plex_meta.py` (metadata přes Plex API, sekce 23 EUNI); Mongo db EUNI
+20
View File
@@ -0,0 +1,20 @@
---
name: project-euni-plex
description: EUNI kurzy/videa do Plexu — stahovani z SeaweedFS + pojmenovani + metadata pres Plex API
metadata:
node_type: memory
type: project
originSessionId: 8a74232a-2f15-43de-8a65-edc8f96fde36
---
Export EUNI kurzů do Plexu (knihovna **EUNI**, typ Other Videos). Skripty v `U:\janssen\EUNI\`.
- **Zdroj dat:** Mongo db `EUNI` (na .76, viz [[project-seaweedfs]]) — kolekce `kurzy` (274 dok: nazev/autor/akreditace/kredity/profese/datum_publikace/url/slug) a `materialy` (1034 dok: druh dokument|video, kurz_id, seaweed_path/fids/size). Videa: 338, z toho 334 v SeaweedFS. Všechny kurzy `profese=[2]` = **Lékař** (batch filtrovaný na lékaře; další profese se teprve dotáhnou).
- **Bloby:** SeaweedFS filer `http://192.168.1.50:8888/<seaweed_path>` (raw GET, URL-encode po segmentech).
- **Plex:** `http://192.168.1.76:32400`, sekce **23 = EUNI** (type movie). Token = `PlexOnlineToken` v `Preferences.xml` (binhex-plexpass appdata na .76). API edit: `PUT /library/sections/23/all?type=1&id=<ratingKey>&<pole>.value=...&<pole>.locked=1` (lock = sken nepřepíše). Štítky `genre[0].tag.tag=...`, poster `POST /library/metadata/<rk>/posters?url=`.
- **`plex_export.py`** — stáhne 334 videí (~119 GiB) z fileru na `\\tower\PlexBinHex\EUNI`. Idempotentní (.part + kontrola velikosti, hotové skipuje), log `_export_log.txt`. Konvence názvu:
- single-video kurz: `<Nazev> - <Prijmeni> (<rok>).mp4`
- multi-video kurz (54 ks, většinou série rovnocenných přednášek, ne hlavni+balast): `<Nazev> - <NN> <segment> (<rok>).mp4`, autor vynechán. Pořadí NN: nejdřív explicitní číslo v názvu (`1.`, `TABAK_01`, `p01`, `část 2`), jinak vimeo id (chronologicky), jinak abecedně. Příjmení = vše po křestním jménu po stripu titulů/hodností (drží dvojitá příjmení).
- **`plex_meta.py`** — 2. průchod po skenu: spáruje Plex položku s kurzem podle názvu (sdílí `build_plan`/`order_items`/`seg_label` z plex_export) a zapíše+zamkne: originallyAvailableAt=datum_publikace, studio=EUNI, summary (autor+akreditace+kredity+url), genre „EUNI kurz"+profese (PROFESE dict {2:"Lékař"}), label „EUNI", u multi-video collection=nazev kurzu. Token přes `PLEX_TOKEN` env nebo `--token`.
- **Poster:** zápis metadat přes API shodí u Other Videos výběr posteru na černý 1. snímek. `plex_meta.py` proto po zápisu zavolá `fix_poster()` — vybere největší z auto-generovaných náhledů (`/library/metadata/<rk>/posters``media://…thumbN.jpg`, PUT `/poster?url=`). Vypnout `--no-poster`. euni.cz oficiální obrázek nedává (JS/login).
- **Postup:** `python plex_export.py` → Plex Scan Library Files → `python plex_meta.py`. Pouštět na drátovém stroji (ne notebook přes wifi). Ověřeno end-to-end na kurzu Diagnostika CRP (ratingKey 80992).