w22
This commit is contained in:
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Deduplikace: zkontroluje obrázky ve vybrané složce oproti tabulce zaloha_obrazku.
|
||||
Pokud je obrázek bezpečně zazálohován (blake3 sedí + záložní soubor existuje), smaže ho.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog, messagebox
|
||||
from pathlib import Path
|
||||
import psycopg2
|
||||
import blake3
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ── DB připojení ────────────────────────────────────────────────────────────
|
||||
DB_DSN = (
|
||||
f"host={os.getenv('DB_HOST', 'localhost')} "
|
||||
f"port={os.getenv('DB_PORT', '5432')} "
|
||||
f"dbname={os.getenv('DB_NAME')} "
|
||||
f"user={os.getenv('DB_USER')} "
|
||||
f"password={os.getenv('DB_PASSWORD')}"
|
||||
)
|
||||
|
||||
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".tif",
|
||||
".webp", ".heic", ".heif", ".raw", ".cr2", ".nef", ".arw"}
|
||||
|
||||
# Překlad Linux cest na Windows UNC
|
||||
LINUX_TO_WINDOWS = {
|
||||
"/mnt/user/ZalohaVsechObrazku": r"\\Tower1\ZalohaVsechObrazku",
|
||||
"/mnt/remotes/TOWER1.LAN_ZalohaVsechObrazku": r"\\Tower1\ZalohaVsechObrazku",
|
||||
}
|
||||
|
||||
|
||||
def linux_to_windows_path(linux_path: str) -> Path:
|
||||
for prefix, unc in LINUX_TO_WINDOWS.items():
|
||||
if linux_path.startswith(prefix):
|
||||
rest = linux_path[len(prefix):].replace("/", "\\")
|
||||
return Path(unc + rest)
|
||||
return Path(linux_path)
|
||||
|
||||
|
||||
def compute_blake3(path: Path) -> str:
|
||||
h = blake3.blake3()
|
||||
with open(path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def pick_folder() -> Path | None:
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
folder = filedialog.askdirectory(title="Vyber složku k deduplikaci")
|
||||
root.destroy()
|
||||
return Path(folder) if folder else None
|
||||
|
||||
|
||||
def log(msg: str):
|
||||
print(msg, flush=True)
|
||||
|
||||
|
||||
def dedup(folder: Path):
|
||||
images = [p for p in folder.rglob("*") if p.suffix.lower() in IMAGE_EXTENSIONS]
|
||||
log(f"Nalezeno obrázků: {len(images)}")
|
||||
|
||||
if not images:
|
||||
log("Žádné obrázky k zpracování.")
|
||||
return
|
||||
|
||||
conn = psycopg2.connect(DB_DSN)
|
||||
cur = conn.cursor()
|
||||
|
||||
deleted = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
|
||||
for img in images:
|
||||
try:
|
||||
size = img.stat().st_size
|
||||
name = img.name
|
||||
|
||||
# 1. Hledej podle velikosti
|
||||
cur.execute(
|
||||
"SELECT blake3_hash, cesta_zalohy, nazev_souboru FROM zaloha_obrazku "
|
||||
"WHERE velikost = %s",
|
||||
(size,)
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
|
||||
if not rows:
|
||||
log(f" PŘESKOČEN (žádná záloha se stejnou velikostí): {img.name}")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# 2. Spočítej blake3 lokálního souboru (jednou pro všechny kandidáty)
|
||||
local_hash = compute_blake3(img)
|
||||
|
||||
matched = False
|
||||
for db_hash, cesta_zalohy, db_name in rows:
|
||||
|
||||
if local_hash != db_hash:
|
||||
continue
|
||||
|
||||
# 3. Zkontroluj, že záložní soubor fyzicky existuje
|
||||
backup_path = linux_to_windows_path(cesta_zalohy)
|
||||
if not backup_path.exists():
|
||||
log(f" PŘESKOČEN (záložní soubor neexistuje): {cesta_zalohy}")
|
||||
continue
|
||||
|
||||
# 4. Zkontroluj blake3 záložního souboru
|
||||
backup_hash = compute_blake3(backup_path)
|
||||
if backup_hash != db_hash:
|
||||
log(f" PŘESKOČEN (blake3 zálohy nesedí s DB): {cesta_zalohy}")
|
||||
continue
|
||||
|
||||
matched = True
|
||||
break
|
||||
|
||||
if matched:
|
||||
img.unlink()
|
||||
if img.name != db_name:
|
||||
log(f" SMAZÁN [JINÉ JMÉNO]: {img.name} != záloha: {db_name} (binary identické)")
|
||||
else:
|
||||
log(f" SMAZÁN: {img}")
|
||||
deleted += 1
|
||||
else:
|
||||
log(f" PŘESKOČEN (žádná platná záloha neprošla): {img.name}")
|
||||
skipped += 1
|
||||
|
||||
except Exception as e:
|
||||
log(f" CHYBA ({img.name}): {e}")
|
||||
errors += 1
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
log(f"\n--- Hotovo ---")
|
||||
log(f"Smazáno: {deleted}")
|
||||
log(f"Přeskočeno: {skipped}")
|
||||
log(f"Chyby: {errors}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
folder = pick_folder()
|
||||
if not folder:
|
||||
print("Žádná složka nevybrána. Končím.")
|
||||
sys.exit(0)
|
||||
|
||||
print(f"Zpracovávám složku: {folder}")
|
||||
dedup(folder)
|
||||
@@ -1,4 +1,5 @@
|
||||
blake3
|
||||
python-dotenv
|
||||
exifread
|
||||
imagehash
|
||||
pandas
|
||||
|
||||
Reference in New Issue
Block a user