Files
fotkyBuzalkovi/30 SběrDat/generate_thumbnails.py
T
administrator 29037554b8 notebookVb
2026-05-29 06:01:55 +02:00

314 lines
12 KiB
Python

#!/usr/bin/env python3
r"""
Thumbnail generation pipeline for photos database.
Reads photos with no thumbnail, generates 200x200 JPEG thumbnails, updates DB.
Cesty v DB se ukládají vždy v nativním Tower1 formátu:
/mnt/user/ZalohaVsechObrazku/thumbnails/{year}/{month}/{sha256}.jpg
Fyzické zápisy probíhají přes cestu odpovídající aktuálnímu prostředí:
- Tower1 (Unraid): /mnt/user/ZalohaVsechObrazku/thumbnails/...
- tower (Unraid): /mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku/thumbnails/...
- Windows: \\Tower1\ZalohaVsechObrazku\thumbnails\...
Stejný princip platí pro čtení zdrojových fotek — cesty v DB jsou v Tower1
formátu a skript je přemapuje na lokální mount.
Usage:
python generate_thumbnails.py [--batch-size 1000] [--dry-run]
Pro omezení počtu zpracovaných fotek nastav proměnnou MAX_PHOTOS níže (0 = všechny).
"""
import argparse
import logging
import os
import platform
import shutil
import socket
import sys
import time
from pathlib import Path, PurePosixPath
import psycopg2
import psycopg2.extras
from dotenv import load_dotenv
from PIL import Image, ImageOps
# .env hledáme nejprve vedle skriptu, pak v rodičovském adresáři (root projektu)
_here = Path(__file__).parent
for _env in (_here / ".env", _here.parent / ".env"):
if _env.is_file():
load_dotenv(_env)
break
# ── Konfigurace ──────────────────────────────────────────────────────────────
# Maximální počet fotek ke zpracování (0 = všechny)
MAX_PHOTOS = 10
# Pokud True, na začátku skriptu se smažou všechny thumbnaily (DB i soubory)
RESET = True
# Pokud True, vedle každého thumbnailu se uloží i kopie originálu jako {sha256}_o.{ext}
# (pouze pro testování / vizuální srovnání)
SAVE_ORIGINAL = True
MAX_SIZE = (400, 400)
JPEG_QUALITY = 85
BATCH_SIZE = 1000
# Kanonický prefix pro DB (nativní Tower1 cesta)
DB_THUMBNAIL_BASE = "/mnt/user/ZalohaVsechObrazku/thumbnails"
DB_SOURCE_BASE = "/mnt/user/ZalohaVsechObrazku"
# Fyzické cesty podle prostředí
if platform.system() == "Windows":
LOCAL_THUMBNAIL_BASE = Path(r"\\Tower1\ZalohaVsechObrazku\thumbnails")
LOCAL_SOURCE_BASE = Path(r"\\Tower1\ZalohaVsechObrazku")
else:
hostname = socket.gethostname()
if hostname == "Tower1":
LOCAL_THUMBNAIL_BASE = Path("/mnt/user/ZalohaVsechObrazku/thumbnails")
LOCAL_SOURCE_BASE = Path("/mnt/user/ZalohaVsechObrazku")
else:
# tower nebo jiný Linux stroj — přes remote mount
LOCAL_THUMBNAIL_BASE = Path("/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku/thumbnails")
LOCAL_SOURCE_BASE = Path("/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku")
# ── Logging ──────────────────────────────────────────────────────────────────
LOG_FILE = Path(__file__).parent / "generate_thumbnails.log"
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler(LOG_FILE, encoding="utf-8"),
],
)
log = logging.getLogger(__name__)
# ── Pomocné funkce ───────────────────────────────────────────────────────────
def get_conn():
return psycopg2.connect(
host=os.getenv("DB_HOST") or os.getenv("PGHOST", "192.168.1.76"),
port=int(os.getenv("DB_PORT") or os.getenv("PGPORT", 5432)),
dbname=os.getenv("DB_NAME") or os.getenv("PGDATABASE", "fotky_buzalkovi"),
user=os.getenv("DB_USER") or os.getenv("PGUSER", "vladimir.buzalka"),
password=os.getenv("DB_PASSWORD") or os.getenv("PGPASSWORD", ""),
)
def db_path_to_local(db_path: str) -> Path:
"""Převede cestu z DB (Tower1 nativní) na lokální cestu pro čtení/zápis."""
if db_path.startswith(DB_SOURCE_BASE):
relative = db_path[len(DB_SOURCE_BASE):]
return LOCAL_SOURCE_BASE / relative.lstrip("/")
return Path(db_path)
def thumbnail_db_path(sha256: str, taken_at) -> str:
"""Vrátí kanonickou cestu thumbnailu pro uložení do DB (Tower1 formát)."""
if taken_at:
year = str(taken_at.year)
month = f"{taken_at.month:02d}"
else:
year = "unknown"
month = "unknown"
return f"{DB_THUMBNAIL_BASE}/{year}/{month}/{sha256.strip()}.jpg"
def thumbnail_local_path(sha256: str, taken_at) -> Path:
"""Vrátí lokální fyzickou cestu thumbnailu pro zápis souboru."""
if taken_at:
year = str(taken_at.year)
month = f"{taken_at.month:02d}"
else:
year = "unknown"
month = "unknown"
return LOCAL_THUMBNAIL_BASE / year / month / f"{sha256.strip()}.jpg"
def generate_thumbnail(source_path: Path, dest_path: Path) -> bool:
"""Vygeneruje JPEG thumbnail se zachováním poměru stran.
Pokud je SAVE_ORIGINAL=True, zkopíruje vedle i originál s sufixem _o."""
dest_path.parent.mkdir(parents=True, exist_ok=True)
with Image.open(source_path) as img:
# Aplikuj EXIF Orientation, jinak vyjdou iPhone/foťák fotky otočené
img = ImageOps.exif_transpose(img)
if img.mode in ("RGBA", "P", "LA"):
img = img.convert("RGB")
img.thumbnail(MAX_SIZE, Image.LANCZOS)
img.save(dest_path, "JPEG", quality=JPEG_QUALITY)
# Kopie originálu vedle thumbnailu se sufixem _o (jen pro testovací účely)
if SAVE_ORIGINAL:
original_dest = dest_path.with_name(f"{dest_path.stem}_o{source_path.suffix}")
shutil.copy2(source_path, original_dest)
return True
def reset_thumbnails(conn, dry_run: bool) -> None:
"""Smaže všechny thumbnaily — soubory z disku a vynuluje thumbnail_path v DB."""
log.warning("RESET=True — mažu existující thumbnaily.")
# 1) Smazat adresář s thumbnaily
if LOCAL_THUMBNAIL_BASE.exists():
if dry_run:
log.info("[DRY RUN] Would delete directory tree: %s", LOCAL_THUMBNAIL_BASE)
else:
log.info("Deleting directory tree: %s", LOCAL_THUMBNAIL_BASE)
shutil.rmtree(LOCAL_THUMBNAIL_BASE, ignore_errors=True)
else:
log.info("Thumbnail dir does not exist, skipping FS delete: %s", LOCAL_THUMBNAIL_BASE)
# 2) Vynulovat thumbnail_path v DB
if dry_run:
log.info("[DRY RUN] Would UPDATE photos SET thumbnail_path = NULL WHERE thumbnail_path IS NOT NULL")
else:
with conn.cursor() as cur:
cur.execute("UPDATE photos SET thumbnail_path = NULL WHERE thumbnail_path IS NOT NULL")
affected = cur.rowcount
conn.commit()
log.info("DB reset: cleared thumbnail_path on %d rows.", affected)
# ── Zpracování ───────────────────────────────────────────────────────────────
def process_batch(conn, batch_size: int, dry_run: bool) -> int:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute(
"""
SELECT id, sha256_file, file_path, taken_at
FROM photos
WHERE thumbnail_path IS NULL
ORDER BY
-- 1) reálné fotky (>= 1 MB) jdou před drobky/testy
(file_size < 1048576),
-- 2) Apple + DateTimeOriginal jdou úplně první
NOT (exif_raw->>'Image Make' = 'Apple'
AND exif_raw ? 'EXIF DateTimeOriginal'),
-- 3) pak ostatní s DateTimeOriginal
NOT (exif_raw ? 'EXIF DateTimeOriginal'),
-- 4) pak cokoli s exif_raw
(exif_raw IS NULL),
id
LIMIT %s
""",
(batch_size,),
)
rows = cur.fetchall()
if not rows:
return 0
processed = 0
for row in rows:
photo_id = row["id"]
sha256 = row["sha256_file"]
source_db = row["file_path"]
taken_at = row["taken_at"]
db_path = thumbnail_db_path(sha256, taken_at)
local_dest = thumbnail_local_path(sha256, taken_at)
local_source = db_path_to_local(source_db)
# Thumbnail už existuje na disku — jen zapsat cestu do DB
if local_dest.exists():
if not dry_run:
with conn.cursor() as cur:
cur.execute(
"UPDATE photos SET thumbnail_path = %s WHERE id = %s",
(db_path, photo_id),
)
conn.commit()
processed += 1
continue
# Zdrojový soubor neexistuje
if not local_source.is_file():
log.warning("Source missing, skipping id=%d: %s (local: %s)", photo_id, source_db, local_source)
continue
try:
if dry_run:
log.info("[DRY RUN] Would generate: %s -> %s (DB: %s)", local_source, local_dest, db_path)
else:
generate_thumbnail(local_source, local_dest)
with conn.cursor() as cur:
cur.execute(
"UPDATE photos SET thumbnail_path = %s WHERE id = %s",
(db_path, photo_id),
)
conn.commit()
processed += 1
except Exception:
log.exception("Failed to generate thumbnail for id=%d: %s", photo_id, source_db)
conn.rollback()
return processed
def main():
parser = argparse.ArgumentParser(description="Generate photo thumbnails")
parser.add_argument("--batch-size", type=int, default=BATCH_SIZE)
parser.add_argument("--dry-run", action="store_true", help="Don't write files or update DB")
args = parser.parse_args()
limit = MAX_PHOTOS
log.info("=" * 60)
log.info("Starting thumbnail generation")
log.info(" batch_size=%d, dry_run=%s, limit=%s", args.batch_size, args.dry_run, limit or "all")
log.info(" hostname=%s, platform=%s", socket.gethostname(), platform.system())
log.info(" source base (local): %s", LOCAL_SOURCE_BASE)
log.info(" thumbnail base (local): %s", LOCAL_THUMBNAIL_BASE)
log.info(" thumbnail base (DB): %s", DB_THUMBNAIL_BASE)
conn = get_conn()
total_processed = 0
batch_num = 0
try:
if RESET:
reset_thumbnails(conn, args.dry_run)
while True:
# Pokud je limit nastavený, omezíme velikost dávky na zbývající počet
remaining = args.batch_size
if limit > 0:
remaining = min(args.batch_size, limit - total_processed)
if remaining <= 0:
log.info("Limit %d reached. Done.", limit)
break
batch_num += 1
t0 = time.time()
count = process_batch(conn, remaining, args.dry_run)
elapsed = time.time() - t0
if count == 0:
log.info("No more photos to process. Done.")
break
total_processed += count
log.info(
"Batch %d: processed %d thumbnails in %.1fs (total: %d)",
batch_num, count, elapsed, total_processed,
)
except KeyboardInterrupt:
log.info("Interrupted by user. Total processed: %d", total_processed)
finally:
conn.close()
log.info("Finished. Total thumbnails: %d", total_processed)
if __name__ == "__main__":
main()