notebookVb

This commit is contained in:
administrator
2026-05-29 06:01:55 +02:00
parent be49fe55f6
commit 29037554b8
4 changed files with 595 additions and 0 deletions
+313
View File
@@ -0,0 +1,313 @@
#!/usr/bin/env python3
r"""
Thumbnail generation pipeline for photos database.
Reads photos with no thumbnail, generates 200x200 JPEG thumbnails, updates DB.
Cesty v DB se ukládají vždy v nativním Tower1 formátu:
/mnt/user/ZalohaVsechObrazku/thumbnails/{year}/{month}/{sha256}.jpg
Fyzické zápisy probíhají přes cestu odpovídající aktuálnímu prostředí:
- Tower1 (Unraid): /mnt/user/ZalohaVsechObrazku/thumbnails/...
- tower (Unraid): /mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku/thumbnails/...
- Windows: \\Tower1\ZalohaVsechObrazku\thumbnails\...
Stejný princip platí pro čtení zdrojových fotek — cesty v DB jsou v Tower1
formátu a skript je přemapuje na lokální mount.
Usage:
python generate_thumbnails.py [--batch-size 1000] [--dry-run]
Pro omezení počtu zpracovaných fotek nastav proměnnou MAX_PHOTOS níže (0 = všechny).
"""
import argparse
import logging
import os
import platform
import shutil
import socket
import sys
import time
from pathlib import Path, PurePosixPath
import psycopg2
import psycopg2.extras
from dotenv import load_dotenv
from PIL import Image, ImageOps
# .env hledáme nejprve vedle skriptu, pak v rodičovském adresáři (root projektu)
_here = Path(__file__).parent
for _env in (_here / ".env", _here.parent / ".env"):
if _env.is_file():
load_dotenv(_env)
break
# ── Konfigurace ──────────────────────────────────────────────────────────────
# Maximální počet fotek ke zpracování (0 = všechny)
MAX_PHOTOS = 10
# Pokud True, na začátku skriptu se smažou všechny thumbnaily (DB i soubory)
RESET = True
# Pokud True, vedle každého thumbnailu se uloží i kopie originálu jako {sha256}_o.{ext}
# (pouze pro testování / vizuální srovnání)
SAVE_ORIGINAL = True
MAX_SIZE = (400, 400)
JPEG_QUALITY = 85
BATCH_SIZE = 1000
# Kanonický prefix pro DB (nativní Tower1 cesta)
DB_THUMBNAIL_BASE = "/mnt/user/ZalohaVsechObrazku/thumbnails"
DB_SOURCE_BASE = "/mnt/user/ZalohaVsechObrazku"
# Fyzické cesty podle prostředí
if platform.system() == "Windows":
LOCAL_THUMBNAIL_BASE = Path(r"\\Tower1\ZalohaVsechObrazku\thumbnails")
LOCAL_SOURCE_BASE = Path(r"\\Tower1\ZalohaVsechObrazku")
else:
hostname = socket.gethostname()
if hostname == "Tower1":
LOCAL_THUMBNAIL_BASE = Path("/mnt/user/ZalohaVsechObrazku/thumbnails")
LOCAL_SOURCE_BASE = Path("/mnt/user/ZalohaVsechObrazku")
else:
# tower nebo jiný Linux stroj — přes remote mount
LOCAL_THUMBNAIL_BASE = Path("/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku/thumbnails")
LOCAL_SOURCE_BASE = Path("/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku")
# ── Logging ──────────────────────────────────────────────────────────────────
LOG_FILE = Path(__file__).parent / "generate_thumbnails.log"
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler(LOG_FILE, encoding="utf-8"),
],
)
log = logging.getLogger(__name__)
# ── Pomocné funkce ───────────────────────────────────────────────────────────
def get_conn():
return psycopg2.connect(
host=os.getenv("DB_HOST") or os.getenv("PGHOST", "192.168.1.76"),
port=int(os.getenv("DB_PORT") or os.getenv("PGPORT", 5432)),
dbname=os.getenv("DB_NAME") or os.getenv("PGDATABASE", "fotky_buzalkovi"),
user=os.getenv("DB_USER") or os.getenv("PGUSER", "vladimir.buzalka"),
password=os.getenv("DB_PASSWORD") or os.getenv("PGPASSWORD", ""),
)
def db_path_to_local(db_path: str) -> Path:
"""Převede cestu z DB (Tower1 nativní) na lokální cestu pro čtení/zápis."""
if db_path.startswith(DB_SOURCE_BASE):
relative = db_path[len(DB_SOURCE_BASE):]
return LOCAL_SOURCE_BASE / relative.lstrip("/")
return Path(db_path)
def thumbnail_db_path(sha256: str, taken_at) -> str:
"""Vrátí kanonickou cestu thumbnailu pro uložení do DB (Tower1 formát)."""
if taken_at:
year = str(taken_at.year)
month = f"{taken_at.month:02d}"
else:
year = "unknown"
month = "unknown"
return f"{DB_THUMBNAIL_BASE}/{year}/{month}/{sha256.strip()}.jpg"
def thumbnail_local_path(sha256: str, taken_at) -> Path:
"""Vrátí lokální fyzickou cestu thumbnailu pro zápis souboru."""
if taken_at:
year = str(taken_at.year)
month = f"{taken_at.month:02d}"
else:
year = "unknown"
month = "unknown"
return LOCAL_THUMBNAIL_BASE / year / month / f"{sha256.strip()}.jpg"
def generate_thumbnail(source_path: Path, dest_path: Path) -> bool:
"""Vygeneruje JPEG thumbnail se zachováním poměru stran.
Pokud je SAVE_ORIGINAL=True, zkopíruje vedle i originál s sufixem _o."""
dest_path.parent.mkdir(parents=True, exist_ok=True)
with Image.open(source_path) as img:
# Aplikuj EXIF Orientation, jinak vyjdou iPhone/foťák fotky otočené
img = ImageOps.exif_transpose(img)
if img.mode in ("RGBA", "P", "LA"):
img = img.convert("RGB")
img.thumbnail(MAX_SIZE, Image.LANCZOS)
img.save(dest_path, "JPEG", quality=JPEG_QUALITY)
# Kopie originálu vedle thumbnailu se sufixem _o (jen pro testovací účely)
if SAVE_ORIGINAL:
original_dest = dest_path.with_name(f"{dest_path.stem}_o{source_path.suffix}")
shutil.copy2(source_path, original_dest)
return True
def reset_thumbnails(conn, dry_run: bool) -> None:
"""Smaže všechny thumbnaily — soubory z disku a vynuluje thumbnail_path v DB."""
log.warning("RESET=True — mažu existující thumbnaily.")
# 1) Smazat adresář s thumbnaily
if LOCAL_THUMBNAIL_BASE.exists():
if dry_run:
log.info("[DRY RUN] Would delete directory tree: %s", LOCAL_THUMBNAIL_BASE)
else:
log.info("Deleting directory tree: %s", LOCAL_THUMBNAIL_BASE)
shutil.rmtree(LOCAL_THUMBNAIL_BASE, ignore_errors=True)
else:
log.info("Thumbnail dir does not exist, skipping FS delete: %s", LOCAL_THUMBNAIL_BASE)
# 2) Vynulovat thumbnail_path v DB
if dry_run:
log.info("[DRY RUN] Would UPDATE photos SET thumbnail_path = NULL WHERE thumbnail_path IS NOT NULL")
else:
with conn.cursor() as cur:
cur.execute("UPDATE photos SET thumbnail_path = NULL WHERE thumbnail_path IS NOT NULL")
affected = cur.rowcount
conn.commit()
log.info("DB reset: cleared thumbnail_path on %d rows.", affected)
# ── Zpracování ───────────────────────────────────────────────────────────────
def process_batch(conn, batch_size: int, dry_run: bool) -> int:
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
cur.execute(
"""
SELECT id, sha256_file, file_path, taken_at
FROM photos
WHERE thumbnail_path IS NULL
ORDER BY
-- 1) reálné fotky (>= 1 MB) jdou před drobky/testy
(file_size < 1048576),
-- 2) Apple + DateTimeOriginal jdou úplně první
NOT (exif_raw->>'Image Make' = 'Apple'
AND exif_raw ? 'EXIF DateTimeOriginal'),
-- 3) pak ostatní s DateTimeOriginal
NOT (exif_raw ? 'EXIF DateTimeOriginal'),
-- 4) pak cokoli s exif_raw
(exif_raw IS NULL),
id
LIMIT %s
""",
(batch_size,),
)
rows = cur.fetchall()
if not rows:
return 0
processed = 0
for row in rows:
photo_id = row["id"]
sha256 = row["sha256_file"]
source_db = row["file_path"]
taken_at = row["taken_at"]
db_path = thumbnail_db_path(sha256, taken_at)
local_dest = thumbnail_local_path(sha256, taken_at)
local_source = db_path_to_local(source_db)
# Thumbnail už existuje na disku — jen zapsat cestu do DB
if local_dest.exists():
if not dry_run:
with conn.cursor() as cur:
cur.execute(
"UPDATE photos SET thumbnail_path = %s WHERE id = %s",
(db_path, photo_id),
)
conn.commit()
processed += 1
continue
# Zdrojový soubor neexistuje
if not local_source.is_file():
log.warning("Source missing, skipping id=%d: %s (local: %s)", photo_id, source_db, local_source)
continue
try:
if dry_run:
log.info("[DRY RUN] Would generate: %s -> %s (DB: %s)", local_source, local_dest, db_path)
else:
generate_thumbnail(local_source, local_dest)
with conn.cursor() as cur:
cur.execute(
"UPDATE photos SET thumbnail_path = %s WHERE id = %s",
(db_path, photo_id),
)
conn.commit()
processed += 1
except Exception:
log.exception("Failed to generate thumbnail for id=%d: %s", photo_id, source_db)
conn.rollback()
return processed
def main():
parser = argparse.ArgumentParser(description="Generate photo thumbnails")
parser.add_argument("--batch-size", type=int, default=BATCH_SIZE)
parser.add_argument("--dry-run", action="store_true", help="Don't write files or update DB")
args = parser.parse_args()
limit = MAX_PHOTOS
log.info("=" * 60)
log.info("Starting thumbnail generation")
log.info(" batch_size=%d, dry_run=%s, limit=%s", args.batch_size, args.dry_run, limit or "all")
log.info(" hostname=%s, platform=%s", socket.gethostname(), platform.system())
log.info(" source base (local): %s", LOCAL_SOURCE_BASE)
log.info(" thumbnail base (local): %s", LOCAL_THUMBNAIL_BASE)
log.info(" thumbnail base (DB): %s", DB_THUMBNAIL_BASE)
conn = get_conn()
total_processed = 0
batch_num = 0
try:
if RESET:
reset_thumbnails(conn, args.dry_run)
while True:
# Pokud je limit nastavený, omezíme velikost dávky na zbývající počet
remaining = args.batch_size
if limit > 0:
remaining = min(args.batch_size, limit - total_processed)
if remaining <= 0:
log.info("Limit %d reached. Done.", limit)
break
batch_num += 1
t0 = time.time()
count = process_batch(conn, remaining, args.dry_run)
elapsed = time.time() - t0
if count == 0:
log.info("No more photos to process. Done.")
break
total_processed += count
log.info(
"Batch %d: processed %d thumbnails in %.1fs (total: %d)",
batch_num, count, elapsed, total_processed,
)
except KeyboardInterrupt:
log.info("Interrupted by user. Total processed: %d", total_processed)
finally:
conn.close()
log.info("Finished. Total thumbnails: %d", total_processed)
if __name__ == "__main__":
main()
+232
View File
@@ -0,0 +1,232 @@
#!/usr/bin/env python3
"""
FotkyBuzalkovi MCP Server
Poskytuje nástroje pro dotazování PostgreSQL databáze fotky_buzalkovi.
"""
import json
import os
import sys
from pathlib import Path
import psycopg2
import psycopg2.extras
from dotenv import load_dotenv
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import Tool, TextContent
# Načtení .env ze stejného adresáře jako tento skript
load_dotenv(Path(__file__).parent / ".env")
DB_CONFIG = {
"host": os.getenv("DB_HOST", "192.168.1.76"),
"port": int(os.getenv("DB_PORT", 5432)),
"user": os.getenv("DB_USER", "vladimir.buzalka"),
"password": os.getenv("DB_PASSWORD", ""),
"dbname": os.getenv("DB_NAME", "fotky_buzalkovi"),
}
def get_conn():
return psycopg2.connect(**DB_CONFIG)
def run_query(sql: str, params=None, limit: int = 500):
"""Spustí SELECT dotaz a vrátí výsledek jako seznam diktů."""
with get_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, params)
rows = cur.fetchmany(limit)
return [dict(r) for r in rows], cur.description
# ---------------------------------------------------------------------------
# Server
# ---------------------------------------------------------------------------
server = Server("fotky-buzalkovi")
@server.list_tools()
async def list_tools() -> list[Tool]:
return [
Tool(
name="query",
description=(
"Spustí libovolný SELECT dotaz na databázi fotky_buzalkovi. "
"Vrátí max. 500 řádků. Používej pro průzkum dat."
),
inputSchema={
"type": "object",
"properties": {
"sql": {
"type": "string",
"description": "SELECT dotaz (jen čtení, INSERT/UPDATE/DELETE nejsou povoleny)",
},
"limit": {
"type": "integer",
"description": "Max. počet vrácených řádků (default 100, max 500)",
"default": 100,
},
},
"required": ["sql"],
},
),
Tool(
name="tables",
description="Vrátí seznam všech tabulek v databázi s počty řádků.",
inputSchema={"type": "object", "properties": {}},
),
Tool(
name="describe_table",
description="Vrátí strukturu tabulky — sloupce, typy, nullable, default.",
inputSchema={
"type": "object",
"properties": {
"table": {"type": "string", "description": "Název tabulky"},
},
"required": ["table"],
},
),
Tool(
name="stats",
description=(
"Základní statistiky projektu: počty fotek, stav importu, "
"přehled kamer, roky pořízení, chybějící data."
),
inputSchema={"type": "object", "properties": {}},
),
]
@server.call_tool()
async def call_tool(name: str, arguments: dict):
# Ochrana — jen SELECT
def check_readonly(sql: str):
normalized = sql.strip().upper()
for bad in ("INSERT", "UPDATE", "DELETE", "DROP", "TRUNCATE", "ALTER", "CREATE"):
if normalized.startswith(bad) or f"\n{bad}" in normalized:
raise ValueError(f"Pouze SELECT dotazy jsou povoleny. Nalezeno: {bad}")
try:
if name == "query":
sql = arguments["sql"]
check_readonly(sql)
limit = min(int(arguments.get("limit", 100)), 500)
rows, desc = run_query(sql, limit=limit)
result = json.dumps(rows, ensure_ascii=False, default=str, indent=2)
return [TextContent(type="text", text=result)]
elif name == "tables":
sql = """
SELECT
t.table_name,
c.reltuples::bigint AS est_rows
FROM information_schema.tables t
JOIN pg_class c ON c.relname = t.table_name
WHERE t.table_schema = 'public'
AND t.table_type = 'BASE TABLE'
ORDER BY t.table_name
"""
rows, _ = run_query(sql, limit=100)
return [TextContent(type="text", text=json.dumps(rows, ensure_ascii=False, default=str, indent=2))]
elif name == "describe_table":
table = arguments["table"]
sql = """
SELECT
column_name,
data_type,
character_maximum_length,
is_nullable,
column_default
FROM information_schema.columns
WHERE table_schema = 'public'
AND table_name = %s
ORDER BY ordinal_position
"""
rows, _ = run_query(sql, params=(table,), limit=200)
if not rows:
return [TextContent(type="text", text=f"Tabulka '{table}' nenalezena.")]
return [TextContent(type="text", text=json.dumps(rows, ensure_ascii=False, default=str, indent=2))]
elif name == "stats":
results = {}
with get_conn() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute("SELECT COUNT(*) AS total FROM zaloha_obrazku")
results["zaloha_obrazku_total"] = cur.fetchone()["total"]
cur.execute("SELECT COUNT(*) AS total FROM zdrojove_soubory")
results["zdrojove_soubory_total"] = cur.fetchone()["total"]
cur.execute("SELECT COUNT(*) AS total FROM photos")
results["photos_total"] = cur.fetchone()["total"]
cur.execute("""
SELECT COUNT(*) AS total FROM photos
WHERE taken_at IS NOT NULL
""")
results["photos_with_taken_at"] = cur.fetchone()["total"]
cur.execute("""
SELECT COUNT(*) AS total FROM photos
WHERE gps_lat IS NOT NULL
""")
results["photos_with_gps"] = cur.fetchone()["total"]
cur.execute("""
SELECT camera_model, COUNT(*) AS cnt
FROM photos
WHERE camera_model IS NOT NULL
GROUP BY camera_model
ORDER BY cnt DESC
LIMIT 10
""")
results["top_cameras"] = [dict(r) for r in cur.fetchall()]
cur.execute("""
SELECT EXTRACT(YEAR FROM taken_at)::int AS rok, COUNT(*) AS cnt
FROM photos
WHERE taken_at IS NOT NULL
GROUP BY rok
ORDER BY rok
""")
results["photos_by_year"] = [dict(r) for r in cur.fetchall()]
cur.execute("""
SELECT processing_status, COUNT(*) AS cnt
FROM photos
GROUP BY processing_status
ORDER BY cnt DESC
""")
results["processing_status"] = [dict(r) for r in cur.fetchall()]
return [TextContent(type="text", text=json.dumps(results, ensure_ascii=False, default=str, indent=2))]
else:
return [TextContent(type="text", text=f"Neznámý nástroj: {name}")]
except Exception as e:
return [TextContent(type="text", text=f"Chyba: {e}")]
# ---------------------------------------------------------------------------
# Spuštění
# ---------------------------------------------------------------------------
async def main():
async with stdio_server() as (read_stream, write_stream):
await server.run(
read_stream,
write_stream,
server.create_initialization_options(),
)
if __name__ == "__main__":
import asyncio
asyncio.run(main())
+7
View File
@@ -0,0 +1,7 @@
-- Migration: Add thumbnail_path column to photos table
-- Run: psql -h $PGHOST -d $PGDATABASE -U $PGUSER -f migrations/001_add_thumbnail_path.sql
ALTER TABLE photos ADD COLUMN IF NOT EXISTS thumbnail_path VARCHAR(2000);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_photos_thumbnail_path_null
ON photos (id) WHERE thumbnail_path IS NULL;
+43
View File
@@ -0,0 +1,43 @@
#!/usr/bin/env python3
"""One-shot migration: add thumbnail_path column + partial index."""
import os
import sys
from pathlib import Path
import psycopg2
from dotenv import load_dotenv
load_dotenv(Path(__file__).parent / ".env")
conn = psycopg2.connect(
host=os.getenv("DB_HOST"),
port=int(os.getenv("DB_PORT", 5432)),
dbname=os.getenv("DB_NAME"),
user=os.getenv("DB_USER"),
password=os.getenv("DB_PASSWORD"),
connect_timeout=10,
)
conn.autocommit = True
cur = conn.cursor()
print("Step 1: ALTER TABLE ...", flush=True)
cur.execute("ALTER TABLE photos ADD COLUMN IF NOT EXISTS thumbnail_path VARCHAR(2000)")
print(" Done.", flush=True)
print("Step 2: CREATE INDEX CONCURRENTLY ...", flush=True)
cur.execute(
"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_photos_thumbnail_path_null "
"ON photos (id) WHERE thumbnail_path IS NULL"
)
print(" Done.", flush=True)
cur.execute(
"SELECT column_name FROM information_schema.columns "
"WHERE table_name='photos' AND column_name='thumbnail_path'"
)
row = cur.fetchone()
print(f"Verified column exists: {row is not None}", flush=True)
cur.close()
conn.close()
print("Migration complete.", flush=True)