314 lines
12 KiB
Python
314 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
r"""
|
|
Thumbnail generation pipeline for photos database.
|
|
Reads photos with no thumbnail, generates 200x200 JPEG thumbnails, updates DB.
|
|
|
|
Cesty v DB se ukládají vždy v nativním Tower1 formátu:
|
|
/mnt/user/ZalohaVsechObrazku/thumbnails/{year}/{month}/{sha256}.jpg
|
|
|
|
Fyzické zápisy probíhají přes cestu odpovídající aktuálnímu prostředí:
|
|
- Tower1 (Unraid): /mnt/user/ZalohaVsechObrazku/thumbnails/...
|
|
- tower (Unraid): /mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku/thumbnails/...
|
|
- Windows: \\Tower1\ZalohaVsechObrazku\thumbnails\...
|
|
|
|
Stejný princip platí pro čtení zdrojových fotek — cesty v DB jsou v Tower1
|
|
formátu a skript je přemapuje na lokální mount.
|
|
|
|
Usage:
|
|
python generate_thumbnails.py [--batch-size 1000] [--dry-run]
|
|
|
|
Pro omezení počtu zpracovaných fotek nastav proměnnou MAX_PHOTOS níže (0 = všechny).
|
|
"""
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import platform
|
|
import shutil
|
|
import socket
|
|
import sys
|
|
import time
|
|
from pathlib import Path, PurePosixPath
|
|
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
from dotenv import load_dotenv
|
|
from PIL import Image, ImageOps
|
|
|
|
# .env hledáme nejprve vedle skriptu, pak v rodičovském adresáři (root projektu)
|
|
_here = Path(__file__).parent
|
|
for _env in (_here / ".env", _here.parent / ".env"):
|
|
if _env.is_file():
|
|
load_dotenv(_env)
|
|
break
|
|
|
|
# ── Konfigurace ──────────────────────────────────────────────────────────────
|
|
|
|
# Maximální počet fotek ke zpracování (0 = všechny)
|
|
MAX_PHOTOS = 1000
|
|
|
|
# Pokud True, na začátku skriptu se smažou všechny thumbnaily (DB i soubory)
|
|
RESET = True
|
|
|
|
# Pokud True, vedle každého thumbnailu se uloží i kopie originálu jako {sha256}_o.{ext}
|
|
# (pouze pro testování / vizuální srovnání)
|
|
SAVE_ORIGINAL = True
|
|
|
|
MAX_SIZE = (400, 400)
|
|
JPEG_QUALITY = 85
|
|
BATCH_SIZE = 1000
|
|
|
|
# Kanonický prefix pro DB (nativní Tower1 cesta)
|
|
DB_THUMBNAIL_BASE = "/mnt/user/ZalohaVsechObrazku/thumbnails"
|
|
DB_SOURCE_BASE = "/mnt/user/ZalohaVsechObrazku"
|
|
|
|
# Fyzické cesty podle prostředí
|
|
if platform.system() == "Windows":
|
|
LOCAL_THUMBNAIL_BASE = Path(r"\\Tower1\ZalohaVsechObrazku\thumbnails")
|
|
LOCAL_SOURCE_BASE = Path(r"\\Tower1\ZalohaVsechObrazku")
|
|
else:
|
|
hostname = socket.gethostname()
|
|
if hostname == "Tower1":
|
|
LOCAL_THUMBNAIL_BASE = Path("/mnt/user/ZalohaVsechObrazku/thumbnails")
|
|
LOCAL_SOURCE_BASE = Path("/mnt/user/ZalohaVsechObrazku")
|
|
else:
|
|
# tower nebo jiný Linux stroj — přes remote mount
|
|
LOCAL_THUMBNAIL_BASE = Path("/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku/thumbnails")
|
|
LOCAL_SOURCE_BASE = Path("/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku")
|
|
|
|
# ── Logging ──────────────────────────────────────────────────────────────────
|
|
|
|
LOG_FILE = Path(__file__).parent / "generate_thumbnails.log"
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
handlers=[
|
|
logging.StreamHandler(sys.stdout),
|
|
logging.FileHandler(LOG_FILE, encoding="utf-8"),
|
|
],
|
|
)
|
|
log = logging.getLogger(__name__)
|
|
|
|
# ── Pomocné funkce ───────────────────────────────────────────────────────────
|
|
|
|
|
|
def get_conn():
|
|
return psycopg2.connect(
|
|
host=os.getenv("DB_HOST") or os.getenv("PGHOST", "192.168.1.76"),
|
|
port=int(os.getenv("DB_PORT") or os.getenv("PGPORT", 5432)),
|
|
dbname=os.getenv("DB_NAME") or os.getenv("PGDATABASE", "fotky_buzalkovi"),
|
|
user=os.getenv("DB_USER") or os.getenv("PGUSER", "vladimir.buzalka"),
|
|
password=os.getenv("DB_PASSWORD") or os.getenv("PGPASSWORD", ""),
|
|
)
|
|
|
|
|
|
def db_path_to_local(db_path: str) -> Path:
|
|
"""Převede cestu z DB (Tower1 nativní) na lokální cestu pro čtení/zápis."""
|
|
if db_path.startswith(DB_SOURCE_BASE):
|
|
relative = db_path[len(DB_SOURCE_BASE):]
|
|
return LOCAL_SOURCE_BASE / relative.lstrip("/")
|
|
return Path(db_path)
|
|
|
|
|
|
def thumbnail_db_path(sha256: str, taken_at) -> str:
|
|
"""Vrátí kanonickou cestu thumbnailu pro uložení do DB (Tower1 formát)."""
|
|
if taken_at:
|
|
year = str(taken_at.year)
|
|
month = f"{taken_at.month:02d}"
|
|
else:
|
|
year = "unknown"
|
|
month = "unknown"
|
|
return f"{DB_THUMBNAIL_BASE}/{year}/{month}/{sha256.strip()}.jpg"
|
|
|
|
|
|
def thumbnail_local_path(sha256: str, taken_at) -> Path:
|
|
"""Vrátí lokální fyzickou cestu thumbnailu pro zápis souboru."""
|
|
if taken_at:
|
|
year = str(taken_at.year)
|
|
month = f"{taken_at.month:02d}"
|
|
else:
|
|
year = "unknown"
|
|
month = "unknown"
|
|
return LOCAL_THUMBNAIL_BASE / year / month / f"{sha256.strip()}.jpg"
|
|
|
|
|
|
def generate_thumbnail(source_path: Path, dest_path: Path) -> bool:
|
|
"""Vygeneruje JPEG thumbnail se zachováním poměru stran.
|
|
Pokud je SAVE_ORIGINAL=True, zkopíruje vedle i originál s sufixem _o."""
|
|
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with Image.open(source_path) as img:
|
|
# Aplikuj EXIF Orientation, jinak vyjdou iPhone/foťák fotky otočené
|
|
img = ImageOps.exif_transpose(img)
|
|
if img.mode in ("RGBA", "P", "LA"):
|
|
img = img.convert("RGB")
|
|
img.thumbnail(MAX_SIZE, Image.LANCZOS)
|
|
img.save(dest_path, "JPEG", quality=JPEG_QUALITY)
|
|
|
|
# Kopie originálu vedle thumbnailu se sufixem _o (jen pro testovací účely)
|
|
if SAVE_ORIGINAL:
|
|
original_dest = dest_path.with_name(f"{dest_path.stem}_o{source_path.suffix}")
|
|
shutil.copy2(source_path, original_dest)
|
|
return True
|
|
|
|
|
|
def reset_thumbnails(conn, dry_run: bool) -> None:
|
|
"""Smaže všechny thumbnaily — soubory z disku a vynuluje thumbnail_path v DB."""
|
|
log.warning("RESET=True — mažu existující thumbnaily.")
|
|
|
|
# 1) Smazat adresář s thumbnaily
|
|
if LOCAL_THUMBNAIL_BASE.exists():
|
|
if dry_run:
|
|
log.info("[DRY RUN] Would delete directory tree: %s", LOCAL_THUMBNAIL_BASE)
|
|
else:
|
|
log.info("Deleting directory tree: %s", LOCAL_THUMBNAIL_BASE)
|
|
shutil.rmtree(LOCAL_THUMBNAIL_BASE, ignore_errors=True)
|
|
else:
|
|
log.info("Thumbnail dir does not exist, skipping FS delete: %s", LOCAL_THUMBNAIL_BASE)
|
|
|
|
# 2) Vynulovat thumbnail_path v DB
|
|
if dry_run:
|
|
log.info("[DRY RUN] Would UPDATE photos SET thumbnail_path = NULL WHERE thumbnail_path IS NOT NULL")
|
|
else:
|
|
with conn.cursor() as cur:
|
|
cur.execute("UPDATE photos SET thumbnail_path = NULL WHERE thumbnail_path IS NOT NULL")
|
|
affected = cur.rowcount
|
|
conn.commit()
|
|
log.info("DB reset: cleared thumbnail_path on %d rows.", affected)
|
|
|
|
|
|
# ── Zpracování ───────────────────────────────────────────────────────────────
|
|
|
|
|
|
def process_batch(conn, batch_size: int, dry_run: bool) -> int:
|
|
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
|
cur.execute(
|
|
"""
|
|
SELECT id, sha256_file, file_path, taken_at
|
|
FROM photos
|
|
WHERE thumbnail_path IS NULL
|
|
ORDER BY
|
|
-- 1) reálné fotky (>= 1 MB) jdou před drobky/testy
|
|
(file_size < 1048576),
|
|
-- 2) Apple + DateTimeOriginal jdou úplně první
|
|
NOT (exif_raw->>'Image Make' = 'Apple'
|
|
AND exif_raw ? 'EXIF DateTimeOriginal'),
|
|
-- 3) pak ostatní s DateTimeOriginal
|
|
NOT (exif_raw ? 'EXIF DateTimeOriginal'),
|
|
-- 4) pak cokoli s exif_raw
|
|
(exif_raw IS NULL),
|
|
id
|
|
LIMIT %s
|
|
""",
|
|
(batch_size,),
|
|
)
|
|
rows = cur.fetchall()
|
|
|
|
if not rows:
|
|
return 0
|
|
|
|
processed = 0
|
|
for row in rows:
|
|
photo_id = row["id"]
|
|
sha256 = row["sha256_file"]
|
|
source_db = row["file_path"]
|
|
taken_at = row["taken_at"]
|
|
|
|
db_path = thumbnail_db_path(sha256, taken_at)
|
|
local_dest = thumbnail_local_path(sha256, taken_at)
|
|
local_source = db_path_to_local(source_db)
|
|
|
|
# Thumbnail už existuje na disku — jen zapsat cestu do DB
|
|
if local_dest.exists():
|
|
if not dry_run:
|
|
with conn.cursor() as cur:
|
|
cur.execute(
|
|
"UPDATE photos SET thumbnail_path = %s WHERE id = %s",
|
|
(db_path, photo_id),
|
|
)
|
|
conn.commit()
|
|
processed += 1
|
|
continue
|
|
|
|
# Zdrojový soubor neexistuje
|
|
if not local_source.is_file():
|
|
log.warning("Source missing, skipping id=%d: %s (local: %s)", photo_id, source_db, local_source)
|
|
continue
|
|
|
|
try:
|
|
if dry_run:
|
|
log.info("[DRY RUN] Would generate: %s -> %s (DB: %s)", local_source, local_dest, db_path)
|
|
else:
|
|
generate_thumbnail(local_source, local_dest)
|
|
with conn.cursor() as cur:
|
|
cur.execute(
|
|
"UPDATE photos SET thumbnail_path = %s WHERE id = %s",
|
|
(db_path, photo_id),
|
|
)
|
|
conn.commit()
|
|
processed += 1
|
|
except Exception:
|
|
log.exception("Failed to generate thumbnail for id=%d: %s", photo_id, source_db)
|
|
conn.rollback()
|
|
|
|
return processed
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate photo thumbnails")
|
|
parser.add_argument("--batch-size", type=int, default=BATCH_SIZE)
|
|
parser.add_argument("--dry-run", action="store_true", help="Don't write files or update DB")
|
|
args = parser.parse_args()
|
|
|
|
limit = MAX_PHOTOS
|
|
|
|
log.info("=" * 60)
|
|
log.info("Starting thumbnail generation")
|
|
log.info(" batch_size=%d, dry_run=%s, limit=%s", args.batch_size, args.dry_run, limit or "all")
|
|
log.info(" hostname=%s, platform=%s", socket.gethostname(), platform.system())
|
|
log.info(" source base (local): %s", LOCAL_SOURCE_BASE)
|
|
log.info(" thumbnail base (local): %s", LOCAL_THUMBNAIL_BASE)
|
|
log.info(" thumbnail base (DB): %s", DB_THUMBNAIL_BASE)
|
|
|
|
conn = get_conn()
|
|
total_processed = 0
|
|
batch_num = 0
|
|
|
|
try:
|
|
if RESET:
|
|
reset_thumbnails(conn, args.dry_run)
|
|
|
|
while True:
|
|
# Pokud je limit nastavený, omezíme velikost dávky na zbývající počet
|
|
remaining = args.batch_size
|
|
if limit > 0:
|
|
remaining = min(args.batch_size, limit - total_processed)
|
|
if remaining <= 0:
|
|
log.info("Limit %d reached. Done.", limit)
|
|
break
|
|
|
|
batch_num += 1
|
|
t0 = time.time()
|
|
count = process_batch(conn, remaining, args.dry_run)
|
|
elapsed = time.time() - t0
|
|
|
|
if count == 0:
|
|
log.info("No more photos to process. Done.")
|
|
break
|
|
|
|
total_processed += count
|
|
log.info(
|
|
"Batch %d: processed %d thumbnails in %.1fs (total: %d)",
|
|
batch_num, count, elapsed, total_processed,
|
|
)
|
|
except KeyboardInterrupt:
|
|
log.info("Interrupted by user. Total processed: %d", total_processed)
|
|
finally:
|
|
conn.close()
|
|
|
|
log.info("Finished. Total thumbnails: %d", total_processed)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|