From 29037554b899d027211464b43fd304b2d90fb3fb Mon Sep 17 00:00:00 2001 From: administrator Date: Fri, 29 May 2026 06:01:55 +0200 Subject: [PATCH] notebookVb --- 30 SběrDat/generate_thumbnails.py | 313 ++++++++++++++++++++++++++ mcp_server.py | 232 +++++++++++++++++++ migrations/001_add_thumbnail_path.sql | 7 + run_migration.py | 43 ++++ 4 files changed, 595 insertions(+) create mode 100644 30 SběrDat/generate_thumbnails.py create mode 100644 mcp_server.py create mode 100644 migrations/001_add_thumbnail_path.sql create mode 100644 run_migration.py diff --git a/30 SběrDat/generate_thumbnails.py b/30 SběrDat/generate_thumbnails.py new file mode 100644 index 0000000..1994d1a --- /dev/null +++ b/30 SběrDat/generate_thumbnails.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +r""" +Thumbnail generation pipeline for photos database. +Reads photos with no thumbnail, generates 200x200 JPEG thumbnails, updates DB. + +Cesty v DB se ukládají vždy v nativním Tower1 formátu: + /mnt/user/ZalohaVsechObrazku/thumbnails/{year}/{month}/{sha256}.jpg + +Fyzické zápisy probíhají přes cestu odpovídající aktuálnímu prostředí: + - Tower1 (Unraid): /mnt/user/ZalohaVsechObrazku/thumbnails/... + - tower (Unraid): /mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku/thumbnails/... + - Windows: \\Tower1\ZalohaVsechObrazku\thumbnails\... + +Stejný princip platí pro čtení zdrojových fotek — cesty v DB jsou v Tower1 +formátu a skript je přemapuje na lokální mount. + +Usage: + python generate_thumbnails.py [--batch-size 1000] [--dry-run] + +Pro omezení počtu zpracovaných fotek nastav proměnnou MAX_PHOTOS níže (0 = všechny). +""" + +import argparse +import logging +import os +import platform +import shutil +import socket +import sys +import time +from pathlib import Path, PurePosixPath + +import psycopg2 +import psycopg2.extras +from dotenv import load_dotenv +from PIL import Image, ImageOps + +# .env hledáme nejprve vedle skriptu, pak v rodičovském adresáři (root projektu) +_here = Path(__file__).parent +for _env in (_here / ".env", _here.parent / ".env"): + if _env.is_file(): + load_dotenv(_env) + break + +# ── Konfigurace ────────────────────────────────────────────────────────────── + +# Maximální počet fotek ke zpracování (0 = všechny) +MAX_PHOTOS = 10 + +# Pokud True, na začátku skriptu se smažou všechny thumbnaily (DB i soubory) +RESET = True + +# Pokud True, vedle každého thumbnailu se uloží i kopie originálu jako {sha256}_o.{ext} +# (pouze pro testování / vizuální srovnání) +SAVE_ORIGINAL = True + +MAX_SIZE = (400, 400) +JPEG_QUALITY = 85 +BATCH_SIZE = 1000 + +# Kanonický prefix pro DB (nativní Tower1 cesta) +DB_THUMBNAIL_BASE = "/mnt/user/ZalohaVsechObrazku/thumbnails" +DB_SOURCE_BASE = "/mnt/user/ZalohaVsechObrazku" + +# Fyzické cesty podle prostředí +if platform.system() == "Windows": + LOCAL_THUMBNAIL_BASE = Path(r"\\Tower1\ZalohaVsechObrazku\thumbnails") + LOCAL_SOURCE_BASE = Path(r"\\Tower1\ZalohaVsechObrazku") +else: + hostname = socket.gethostname() + if hostname == "Tower1": + LOCAL_THUMBNAIL_BASE = Path("/mnt/user/ZalohaVsechObrazku/thumbnails") + LOCAL_SOURCE_BASE = Path("/mnt/user/ZalohaVsechObrazku") + else: + # tower nebo jiný Linux stroj — přes remote mount + LOCAL_THUMBNAIL_BASE = Path("/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku/thumbnails") + LOCAL_SOURCE_BASE = Path("/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku") + +# ── Logging ────────────────────────────────────────────────────────────────── + +LOG_FILE = Path(__file__).parent / "generate_thumbnails.log" + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.StreamHandler(sys.stdout), + logging.FileHandler(LOG_FILE, encoding="utf-8"), + ], +) +log = logging.getLogger(__name__) + +# ── Pomocné funkce ─────────────────────────────────────────────────────────── + + +def get_conn(): + return psycopg2.connect( + host=os.getenv("DB_HOST") or os.getenv("PGHOST", "192.168.1.76"), + port=int(os.getenv("DB_PORT") or os.getenv("PGPORT", 5432)), + dbname=os.getenv("DB_NAME") or os.getenv("PGDATABASE", "fotky_buzalkovi"), + user=os.getenv("DB_USER") or os.getenv("PGUSER", "vladimir.buzalka"), + password=os.getenv("DB_PASSWORD") or os.getenv("PGPASSWORD", ""), + ) + + +def db_path_to_local(db_path: str) -> Path: + """Převede cestu z DB (Tower1 nativní) na lokální cestu pro čtení/zápis.""" + if db_path.startswith(DB_SOURCE_BASE): + relative = db_path[len(DB_SOURCE_BASE):] + return LOCAL_SOURCE_BASE / relative.lstrip("/") + return Path(db_path) + + +def thumbnail_db_path(sha256: str, taken_at) -> str: + """Vrátí kanonickou cestu thumbnailu pro uložení do DB (Tower1 formát).""" + if taken_at: + year = str(taken_at.year) + month = f"{taken_at.month:02d}" + else: + year = "unknown" + month = "unknown" + return f"{DB_THUMBNAIL_BASE}/{year}/{month}/{sha256.strip()}.jpg" + + +def thumbnail_local_path(sha256: str, taken_at) -> Path: + """Vrátí lokální fyzickou cestu thumbnailu pro zápis souboru.""" + if taken_at: + year = str(taken_at.year) + month = f"{taken_at.month:02d}" + else: + year = "unknown" + month = "unknown" + return LOCAL_THUMBNAIL_BASE / year / month / f"{sha256.strip()}.jpg" + + +def generate_thumbnail(source_path: Path, dest_path: Path) -> bool: + """Vygeneruje JPEG thumbnail se zachováním poměru stran. + Pokud je SAVE_ORIGINAL=True, zkopíruje vedle i originál s sufixem _o.""" + dest_path.parent.mkdir(parents=True, exist_ok=True) + with Image.open(source_path) as img: + # Aplikuj EXIF Orientation, jinak vyjdou iPhone/foťák fotky otočené + img = ImageOps.exif_transpose(img) + if img.mode in ("RGBA", "P", "LA"): + img = img.convert("RGB") + img.thumbnail(MAX_SIZE, Image.LANCZOS) + img.save(dest_path, "JPEG", quality=JPEG_QUALITY) + + # Kopie originálu vedle thumbnailu se sufixem _o (jen pro testovací účely) + if SAVE_ORIGINAL: + original_dest = dest_path.with_name(f"{dest_path.stem}_o{source_path.suffix}") + shutil.copy2(source_path, original_dest) + return True + + +def reset_thumbnails(conn, dry_run: bool) -> None: + """Smaže všechny thumbnaily — soubory z disku a vynuluje thumbnail_path v DB.""" + log.warning("RESET=True — mažu existující thumbnaily.") + + # 1) Smazat adresář s thumbnaily + if LOCAL_THUMBNAIL_BASE.exists(): + if dry_run: + log.info("[DRY RUN] Would delete directory tree: %s", LOCAL_THUMBNAIL_BASE) + else: + log.info("Deleting directory tree: %s", LOCAL_THUMBNAIL_BASE) + shutil.rmtree(LOCAL_THUMBNAIL_BASE, ignore_errors=True) + else: + log.info("Thumbnail dir does not exist, skipping FS delete: %s", LOCAL_THUMBNAIL_BASE) + + # 2) Vynulovat thumbnail_path v DB + if dry_run: + log.info("[DRY RUN] Would UPDATE photos SET thumbnail_path = NULL WHERE thumbnail_path IS NOT NULL") + else: + with conn.cursor() as cur: + cur.execute("UPDATE photos SET thumbnail_path = NULL WHERE thumbnail_path IS NOT NULL") + affected = cur.rowcount + conn.commit() + log.info("DB reset: cleared thumbnail_path on %d rows.", affected) + + +# ── Zpracování ─────────────────────────────────────────────────────────────── + + +def process_batch(conn, batch_size: int, dry_run: bool) -> int: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute( + """ + SELECT id, sha256_file, file_path, taken_at + FROM photos + WHERE thumbnail_path IS NULL + ORDER BY + -- 1) reálné fotky (>= 1 MB) jdou před drobky/testy + (file_size < 1048576), + -- 2) Apple + DateTimeOriginal jdou úplně první + NOT (exif_raw->>'Image Make' = 'Apple' + AND exif_raw ? 'EXIF DateTimeOriginal'), + -- 3) pak ostatní s DateTimeOriginal + NOT (exif_raw ? 'EXIF DateTimeOriginal'), + -- 4) pak cokoli s exif_raw + (exif_raw IS NULL), + id + LIMIT %s + """, + (batch_size,), + ) + rows = cur.fetchall() + + if not rows: + return 0 + + processed = 0 + for row in rows: + photo_id = row["id"] + sha256 = row["sha256_file"] + source_db = row["file_path"] + taken_at = row["taken_at"] + + db_path = thumbnail_db_path(sha256, taken_at) + local_dest = thumbnail_local_path(sha256, taken_at) + local_source = db_path_to_local(source_db) + + # Thumbnail už existuje na disku — jen zapsat cestu do DB + if local_dest.exists(): + if not dry_run: + with conn.cursor() as cur: + cur.execute( + "UPDATE photos SET thumbnail_path = %s WHERE id = %s", + (db_path, photo_id), + ) + conn.commit() + processed += 1 + continue + + # Zdrojový soubor neexistuje + if not local_source.is_file(): + log.warning("Source missing, skipping id=%d: %s (local: %s)", photo_id, source_db, local_source) + continue + + try: + if dry_run: + log.info("[DRY RUN] Would generate: %s -> %s (DB: %s)", local_source, local_dest, db_path) + else: + generate_thumbnail(local_source, local_dest) + with conn.cursor() as cur: + cur.execute( + "UPDATE photos SET thumbnail_path = %s WHERE id = %s", + (db_path, photo_id), + ) + conn.commit() + processed += 1 + except Exception: + log.exception("Failed to generate thumbnail for id=%d: %s", photo_id, source_db) + conn.rollback() + + return processed + + +def main(): + parser = argparse.ArgumentParser(description="Generate photo thumbnails") + parser.add_argument("--batch-size", type=int, default=BATCH_SIZE) + parser.add_argument("--dry-run", action="store_true", help="Don't write files or update DB") + args = parser.parse_args() + + limit = MAX_PHOTOS + + log.info("=" * 60) + log.info("Starting thumbnail generation") + log.info(" batch_size=%d, dry_run=%s, limit=%s", args.batch_size, args.dry_run, limit or "all") + log.info(" hostname=%s, platform=%s", socket.gethostname(), platform.system()) + log.info(" source base (local): %s", LOCAL_SOURCE_BASE) + log.info(" thumbnail base (local): %s", LOCAL_THUMBNAIL_BASE) + log.info(" thumbnail base (DB): %s", DB_THUMBNAIL_BASE) + + conn = get_conn() + total_processed = 0 + batch_num = 0 + + try: + if RESET: + reset_thumbnails(conn, args.dry_run) + + while True: + # Pokud je limit nastavený, omezíme velikost dávky na zbývající počet + remaining = args.batch_size + if limit > 0: + remaining = min(args.batch_size, limit - total_processed) + if remaining <= 0: + log.info("Limit %d reached. Done.", limit) + break + + batch_num += 1 + t0 = time.time() + count = process_batch(conn, remaining, args.dry_run) + elapsed = time.time() - t0 + + if count == 0: + log.info("No more photos to process. Done.") + break + + total_processed += count + log.info( + "Batch %d: processed %d thumbnails in %.1fs (total: %d)", + batch_num, count, elapsed, total_processed, + ) + except KeyboardInterrupt: + log.info("Interrupted by user. Total processed: %d", total_processed) + finally: + conn.close() + + log.info("Finished. Total thumbnails: %d", total_processed) + + +if __name__ == "__main__": + main() diff --git a/mcp_server.py b/mcp_server.py new file mode 100644 index 0000000..4ea7ed4 --- /dev/null +++ b/mcp_server.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python3 +""" +FotkyBuzalkovi MCP Server +Poskytuje nástroje pro dotazování PostgreSQL databáze fotky_buzalkovi. +""" + +import json +import os +import sys +from pathlib import Path + +import psycopg2 +import psycopg2.extras +from dotenv import load_dotenv +from mcp.server import Server +from mcp.server.stdio import stdio_server +from mcp.types import Tool, TextContent + +# Načtení .env ze stejného adresáře jako tento skript +load_dotenv(Path(__file__).parent / ".env") + +DB_CONFIG = { + "host": os.getenv("DB_HOST", "192.168.1.76"), + "port": int(os.getenv("DB_PORT", 5432)), + "user": os.getenv("DB_USER", "vladimir.buzalka"), + "password": os.getenv("DB_PASSWORD", ""), + "dbname": os.getenv("DB_NAME", "fotky_buzalkovi"), +} + + +def get_conn(): + return psycopg2.connect(**DB_CONFIG) + + +def run_query(sql: str, params=None, limit: int = 500): + """Spustí SELECT dotaz a vrátí výsledek jako seznam diktů.""" + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(sql, params) + rows = cur.fetchmany(limit) + return [dict(r) for r in rows], cur.description + + +# --------------------------------------------------------------------------- +# Server +# --------------------------------------------------------------------------- + +server = Server("fotky-buzalkovi") + + +@server.list_tools() +async def list_tools() -> list[Tool]: + return [ + Tool( + name="query", + description=( + "Spustí libovolný SELECT dotaz na databázi fotky_buzalkovi. " + "Vrátí max. 500 řádků. Používej pro průzkum dat." + ), + inputSchema={ + "type": "object", + "properties": { + "sql": { + "type": "string", + "description": "SELECT dotaz (jen čtení, INSERT/UPDATE/DELETE nejsou povoleny)", + }, + "limit": { + "type": "integer", + "description": "Max. počet vrácených řádků (default 100, max 500)", + "default": 100, + }, + }, + "required": ["sql"], + }, + ), + Tool( + name="tables", + description="Vrátí seznam všech tabulek v databázi s počty řádků.", + inputSchema={"type": "object", "properties": {}}, + ), + Tool( + name="describe_table", + description="Vrátí strukturu tabulky — sloupce, typy, nullable, default.", + inputSchema={ + "type": "object", + "properties": { + "table": {"type": "string", "description": "Název tabulky"}, + }, + "required": ["table"], + }, + ), + Tool( + name="stats", + description=( + "Základní statistiky projektu: počty fotek, stav importu, " + "přehled kamer, roky pořízení, chybějící data." + ), + inputSchema={"type": "object", "properties": {}}, + ), + ] + + +@server.call_tool() +async def call_tool(name: str, arguments: dict): + + # Ochrana — jen SELECT + def check_readonly(sql: str): + normalized = sql.strip().upper() + for bad in ("INSERT", "UPDATE", "DELETE", "DROP", "TRUNCATE", "ALTER", "CREATE"): + if normalized.startswith(bad) or f"\n{bad}" in normalized: + raise ValueError(f"Pouze SELECT dotazy jsou povoleny. Nalezeno: {bad}") + + try: + if name == "query": + sql = arguments["sql"] + check_readonly(sql) + limit = min(int(arguments.get("limit", 100)), 500) + rows, desc = run_query(sql, limit=limit) + result = json.dumps(rows, ensure_ascii=False, default=str, indent=2) + return [TextContent(type="text", text=result)] + + elif name == "tables": + sql = """ + SELECT + t.table_name, + c.reltuples::bigint AS est_rows + FROM information_schema.tables t + JOIN pg_class c ON c.relname = t.table_name + WHERE t.table_schema = 'public' + AND t.table_type = 'BASE TABLE' + ORDER BY t.table_name + """ + rows, _ = run_query(sql, limit=100) + return [TextContent(type="text", text=json.dumps(rows, ensure_ascii=False, default=str, indent=2))] + + elif name == "describe_table": + table = arguments["table"] + sql = """ + SELECT + column_name, + data_type, + character_maximum_length, + is_nullable, + column_default + FROM information_schema.columns + WHERE table_schema = 'public' + AND table_name = %s + ORDER BY ordinal_position + """ + rows, _ = run_query(sql, params=(table,), limit=200) + if not rows: + return [TextContent(type="text", text=f"Tabulka '{table}' nenalezena.")] + return [TextContent(type="text", text=json.dumps(rows, ensure_ascii=False, default=str, indent=2))] + + elif name == "stats": + results = {} + with get_conn() as conn: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + + cur.execute("SELECT COUNT(*) AS total FROM zaloha_obrazku") + results["zaloha_obrazku_total"] = cur.fetchone()["total"] + + cur.execute("SELECT COUNT(*) AS total FROM zdrojove_soubory") + results["zdrojove_soubory_total"] = cur.fetchone()["total"] + + cur.execute("SELECT COUNT(*) AS total FROM photos") + results["photos_total"] = cur.fetchone()["total"] + + cur.execute(""" + SELECT COUNT(*) AS total FROM photos + WHERE taken_at IS NOT NULL + """) + results["photos_with_taken_at"] = cur.fetchone()["total"] + + cur.execute(""" + SELECT COUNT(*) AS total FROM photos + WHERE gps_lat IS NOT NULL + """) + results["photos_with_gps"] = cur.fetchone()["total"] + + cur.execute(""" + SELECT camera_model, COUNT(*) AS cnt + FROM photos + WHERE camera_model IS NOT NULL + GROUP BY camera_model + ORDER BY cnt DESC + LIMIT 10 + """) + results["top_cameras"] = [dict(r) for r in cur.fetchall()] + + cur.execute(""" + SELECT EXTRACT(YEAR FROM taken_at)::int AS rok, COUNT(*) AS cnt + FROM photos + WHERE taken_at IS NOT NULL + GROUP BY rok + ORDER BY rok + """) + results["photos_by_year"] = [dict(r) for r in cur.fetchall()] + + cur.execute(""" + SELECT processing_status, COUNT(*) AS cnt + FROM photos + GROUP BY processing_status + ORDER BY cnt DESC + """) + results["processing_status"] = [dict(r) for r in cur.fetchall()] + + return [TextContent(type="text", text=json.dumps(results, ensure_ascii=False, default=str, indent=2))] + + else: + return [TextContent(type="text", text=f"Neznámý nástroj: {name}")] + + except Exception as e: + return [TextContent(type="text", text=f"Chyba: {e}")] + + +# --------------------------------------------------------------------------- +# Spuštění +# --------------------------------------------------------------------------- + +async def main(): + async with stdio_server() as (read_stream, write_stream): + await server.run( + read_stream, + write_stream, + server.create_initialization_options(), + ) + + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) diff --git a/migrations/001_add_thumbnail_path.sql b/migrations/001_add_thumbnail_path.sql new file mode 100644 index 0000000..6ebc6c2 --- /dev/null +++ b/migrations/001_add_thumbnail_path.sql @@ -0,0 +1,7 @@ +-- Migration: Add thumbnail_path column to photos table +-- Run: psql -h $PGHOST -d $PGDATABASE -U $PGUSER -f migrations/001_add_thumbnail_path.sql + +ALTER TABLE photos ADD COLUMN IF NOT EXISTS thumbnail_path VARCHAR(2000); + +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_photos_thumbnail_path_null + ON photos (id) WHERE thumbnail_path IS NULL; diff --git a/run_migration.py b/run_migration.py new file mode 100644 index 0000000..cd18db9 --- /dev/null +++ b/run_migration.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +"""One-shot migration: add thumbnail_path column + partial index.""" +import os +import sys +from pathlib import Path + +import psycopg2 +from dotenv import load_dotenv + +load_dotenv(Path(__file__).parent / ".env") + +conn = psycopg2.connect( + host=os.getenv("DB_HOST"), + port=int(os.getenv("DB_PORT", 5432)), + dbname=os.getenv("DB_NAME"), + user=os.getenv("DB_USER"), + password=os.getenv("DB_PASSWORD"), + connect_timeout=10, +) +conn.autocommit = True +cur = conn.cursor() + +print("Step 1: ALTER TABLE ...", flush=True) +cur.execute("ALTER TABLE photos ADD COLUMN IF NOT EXISTS thumbnail_path VARCHAR(2000)") +print(" Done.", flush=True) + +print("Step 2: CREATE INDEX CONCURRENTLY ...", flush=True) +cur.execute( + "CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_photos_thumbnail_path_null " + "ON photos (id) WHERE thumbnail_path IS NULL" +) +print(" Done.", flush=True) + +cur.execute( + "SELECT column_name FROM information_schema.columns " + "WHERE table_name='photos' AND column_name='thumbnail_path'" +) +row = cur.fetchone() +print(f"Verified column exists: {row is not None}", flush=True) + +cur.close() +conn.close() +print("Migration complete.", flush=True)