#!/usr/bin/env python3 r""" Thumbnail generation pipeline for photos database. Reads photos with no thumbnail, generates 200x200 JPEG thumbnails, updates DB. Cesty v DB se ukládají vždy v nativním Tower1 formátu: /mnt/user/ZalohaVsechObrazku/thumbnails/{year}/{month}/{sha256}.jpg Fyzické zápisy probíhají přes cestu odpovídající aktuálnímu prostředí: - Tower1 (Unraid): /mnt/user/ZalohaVsechObrazku/thumbnails/... - tower (Unraid): /mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku/thumbnails/... - Windows: \\Tower1\ZalohaVsechObrazku\thumbnails\... Stejný princip platí pro čtení zdrojových fotek — cesty v DB jsou v Tower1 formátu a skript je přemapuje na lokální mount. Usage: python generate_thumbnails.py [--batch-size 1000] [--dry-run] Pro omezení počtu zpracovaných fotek nastav proměnnou MAX_PHOTOS níže (0 = všechny). """ import argparse import logging import os import platform import shutil import socket import sys import time from pathlib import Path, PurePosixPath import psycopg2 import psycopg2.extras from dotenv import load_dotenv from PIL import Image, ImageOps # .env hledáme nejprve vedle skriptu, pak v rodičovském adresáři (root projektu) _here = Path(__file__).parent for _env in (_here / ".env", _here.parent / ".env"): if _env.is_file(): load_dotenv(_env) break # ── Konfigurace ────────────────────────────────────────────────────────────── # Maximální počet fotek ke zpracování (0 = všechny) MAX_PHOTOS = 10 # Pokud True, na začátku skriptu se smažou všechny thumbnaily (DB i soubory) RESET = True # Pokud True, vedle každého thumbnailu se uloží i kopie originálu jako {sha256}_o.{ext} # (pouze pro testování / vizuální srovnání) SAVE_ORIGINAL = True MAX_SIZE = (400, 400) JPEG_QUALITY = 85 BATCH_SIZE = 1000 # Kanonický prefix pro DB (nativní Tower1 cesta) DB_THUMBNAIL_BASE = "/mnt/user/ZalohaVsechObrazku/thumbnails" DB_SOURCE_BASE = "/mnt/user/ZalohaVsechObrazku" # Fyzické cesty podle prostředí if platform.system() == "Windows": LOCAL_THUMBNAIL_BASE = Path(r"\\Tower1\ZalohaVsechObrazku\thumbnails") LOCAL_SOURCE_BASE = Path(r"\\Tower1\ZalohaVsechObrazku") else: hostname = socket.gethostname() if hostname == "Tower1": LOCAL_THUMBNAIL_BASE = Path("/mnt/user/ZalohaVsechObrazku/thumbnails") LOCAL_SOURCE_BASE = Path("/mnt/user/ZalohaVsechObrazku") else: # tower nebo jiný Linux stroj — přes remote mount LOCAL_THUMBNAIL_BASE = Path("/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku/thumbnails") LOCAL_SOURCE_BASE = Path("/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku") # ── Logging ────────────────────────────────────────────────────────────────── LOG_FILE = Path(__file__).parent / "generate_thumbnails.log" logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[ logging.StreamHandler(sys.stdout), logging.FileHandler(LOG_FILE, encoding="utf-8"), ], ) log = logging.getLogger(__name__) # ── Pomocné funkce ─────────────────────────────────────────────────────────── def get_conn(): return psycopg2.connect( host=os.getenv("DB_HOST") or os.getenv("PGHOST", "192.168.1.76"), port=int(os.getenv("DB_PORT") or os.getenv("PGPORT", 5432)), dbname=os.getenv("DB_NAME") or os.getenv("PGDATABASE", "fotky_buzalkovi"), user=os.getenv("DB_USER") or os.getenv("PGUSER", "vladimir.buzalka"), password=os.getenv("DB_PASSWORD") or os.getenv("PGPASSWORD", ""), ) def db_path_to_local(db_path: str) -> Path: """Převede cestu z DB (Tower1 nativní) na lokální cestu pro čtení/zápis.""" if db_path.startswith(DB_SOURCE_BASE): relative = db_path[len(DB_SOURCE_BASE):] return LOCAL_SOURCE_BASE / relative.lstrip("/") return Path(db_path) def thumbnail_db_path(sha256: str, taken_at) -> str: """Vrátí kanonickou cestu thumbnailu pro uložení do DB (Tower1 formát).""" if taken_at: year = str(taken_at.year) month = f"{taken_at.month:02d}" else: year = "unknown" month = "unknown" return f"{DB_THUMBNAIL_BASE}/{year}/{month}/{sha256.strip()}.jpg" def thumbnail_local_path(sha256: str, taken_at) -> Path: """Vrátí lokální fyzickou cestu thumbnailu pro zápis souboru.""" if taken_at: year = str(taken_at.year) month = f"{taken_at.month:02d}" else: year = "unknown" month = "unknown" return LOCAL_THUMBNAIL_BASE / year / month / f"{sha256.strip()}.jpg" def generate_thumbnail(source_path: Path, dest_path: Path) -> bool: """Vygeneruje JPEG thumbnail se zachováním poměru stran. Pokud je SAVE_ORIGINAL=True, zkopíruje vedle i originál s sufixem _o.""" dest_path.parent.mkdir(parents=True, exist_ok=True) with Image.open(source_path) as img: # Aplikuj EXIF Orientation, jinak vyjdou iPhone/foťák fotky otočené img = ImageOps.exif_transpose(img) if img.mode in ("RGBA", "P", "LA"): img = img.convert("RGB") img.thumbnail(MAX_SIZE, Image.LANCZOS) img.save(dest_path, "JPEG", quality=JPEG_QUALITY) # Kopie originálu vedle thumbnailu se sufixem _o (jen pro testovací účely) if SAVE_ORIGINAL: original_dest = dest_path.with_name(f"{dest_path.stem}_o{source_path.suffix}") shutil.copy2(source_path, original_dest) return True def reset_thumbnails(conn, dry_run: bool) -> None: """Smaže všechny thumbnaily — soubory z disku a vynuluje thumbnail_path v DB.""" log.warning("RESET=True — mažu existující thumbnaily.") # 1) Smazat adresář s thumbnaily if LOCAL_THUMBNAIL_BASE.exists(): if dry_run: log.info("[DRY RUN] Would delete directory tree: %s", LOCAL_THUMBNAIL_BASE) else: log.info("Deleting directory tree: %s", LOCAL_THUMBNAIL_BASE) shutil.rmtree(LOCAL_THUMBNAIL_BASE, ignore_errors=True) else: log.info("Thumbnail dir does not exist, skipping FS delete: %s", LOCAL_THUMBNAIL_BASE) # 2) Vynulovat thumbnail_path v DB if dry_run: log.info("[DRY RUN] Would UPDATE photos SET thumbnail_path = NULL WHERE thumbnail_path IS NOT NULL") else: with conn.cursor() as cur: cur.execute("UPDATE photos SET thumbnail_path = NULL WHERE thumbnail_path IS NOT NULL") affected = cur.rowcount conn.commit() log.info("DB reset: cleared thumbnail_path on %d rows.", affected) # ── Zpracování ─────────────────────────────────────────────────────────────── def process_batch(conn, batch_size: int, dry_run: bool) -> int: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute( """ SELECT id, sha256_file, file_path, taken_at FROM photos WHERE thumbnail_path IS NULL ORDER BY -- 1) reálné fotky (>= 1 MB) jdou před drobky/testy (file_size < 1048576), -- 2) Apple + DateTimeOriginal jdou úplně první NOT (exif_raw->>'Image Make' = 'Apple' AND exif_raw ? 'EXIF DateTimeOriginal'), -- 3) pak ostatní s DateTimeOriginal NOT (exif_raw ? 'EXIF DateTimeOriginal'), -- 4) pak cokoli s exif_raw (exif_raw IS NULL), id LIMIT %s """, (batch_size,), ) rows = cur.fetchall() if not rows: return 0 processed = 0 for row in rows: photo_id = row["id"] sha256 = row["sha256_file"] source_db = row["file_path"] taken_at = row["taken_at"] db_path = thumbnail_db_path(sha256, taken_at) local_dest = thumbnail_local_path(sha256, taken_at) local_source = db_path_to_local(source_db) # Thumbnail už existuje na disku — jen zapsat cestu do DB if local_dest.exists(): if not dry_run: with conn.cursor() as cur: cur.execute( "UPDATE photos SET thumbnail_path = %s WHERE id = %s", (db_path, photo_id), ) conn.commit() processed += 1 continue # Zdrojový soubor neexistuje if not local_source.is_file(): log.warning("Source missing, skipping id=%d: %s (local: %s)", photo_id, source_db, local_source) continue try: if dry_run: log.info("[DRY RUN] Would generate: %s -> %s (DB: %s)", local_source, local_dest, db_path) else: generate_thumbnail(local_source, local_dest) with conn.cursor() as cur: cur.execute( "UPDATE photos SET thumbnail_path = %s WHERE id = %s", (db_path, photo_id), ) conn.commit() processed += 1 except Exception: log.exception("Failed to generate thumbnail for id=%d: %s", photo_id, source_db) conn.rollback() return processed def main(): parser = argparse.ArgumentParser(description="Generate photo thumbnails") parser.add_argument("--batch-size", type=int, default=BATCH_SIZE) parser.add_argument("--dry-run", action="store_true", help="Don't write files or update DB") args = parser.parse_args() limit = MAX_PHOTOS log.info("=" * 60) log.info("Starting thumbnail generation") log.info(" batch_size=%d, dry_run=%s, limit=%s", args.batch_size, args.dry_run, limit or "all") log.info(" hostname=%s, platform=%s", socket.gethostname(), platform.system()) log.info(" source base (local): %s", LOCAL_SOURCE_BASE) log.info(" thumbnail base (local): %s", LOCAL_THUMBNAIL_BASE) log.info(" thumbnail base (DB): %s", DB_THUMBNAIL_BASE) conn = get_conn() total_processed = 0 batch_num = 0 try: if RESET: reset_thumbnails(conn, args.dry_run) while True: # Pokud je limit nastavený, omezíme velikost dávky na zbývající počet remaining = args.batch_size if limit > 0: remaining = min(args.batch_size, limit - total_processed) if remaining <= 0: log.info("Limit %d reached. Done.", limit) break batch_num += 1 t0 = time.time() count = process_batch(conn, remaining, args.dry_run) elapsed = time.time() - t0 if count == 0: log.info("No more photos to process. Done.") break total_processed += count log.info( "Batch %d: processed %d thumbnails in %.1fs (total: %d)", batch_num, count, elapsed, total_processed, ) except KeyboardInterrupt: log.info("Interrupted by user. Total processed: %d", total_processed) finally: conn.close() log.info("Finished. Total thumbnails: %d", total_processed) if __name__ == "__main__": main()