#!/usr/bin/python3 # -*- coding: utf-8 -*- """ FAST FILE HASH INDEXER – WINDOWS CLIENT (EXTERNAL DISKS) - Mode: PHYSICAL BACKUP - Hostname in DB = Disk Label (e.g., #HD015) - Path in DB = Relative path (e.g., /Movies/Film.mkv) """ import os, time import pymysql import socket import platform import sys from blake3 import blake3 # ============================== # CONFIG # ============================== CHUNK_SIZE = 5 * 1024 * 1024 # 5 MB PROGRESS_MIN_SIZE = 500 * 1024 * 1024 # 500 MB PROGRESS_INTERVAL = 1.0 # seconds EXCLUDED_DIRS = {"$RECYCLE.BIN", "System Volume Information", "RECYCLER", "msdownld.tmp"} # --- Limity velikosti --- FILE_MIN_SIZE = 0 FILE_MAX_SIZE = 1024 * 1024 * 1024* 1024 # 1TB # --- Nastavení Databáze --- DB_CONFIG = { "host": "192.168.1.76", "port": 3307, "user": "root", "password": "Vlado9674+", "database": "torrents", "charset": "utf8mb4", "autocommit": True, } CHUNK_SIZE = 4 * 1024 * 1024 # 4 MB PRINT_SKIPPED = False # True = vypisovat i přeskočené # ============================== # SYSTEM INFO # ============================== # Fyzický název PC (jen pro výpis do konzole, do DB půjde název disku) REAL_PC_HOSTNAME = socket.gethostname() OS_NAME = platform.system() # ============================== # FUNCTIONS # ============================== def compute_blake3(path: str) -> bytes: h = blake3() total_size = os.path.getsize(path) show_progress = total_size >= PROGRESS_MIN_SIZE processed = 0 start_time = time.time() last_report = start_time try: with open(path, "rb") as f: while True: chunk = f.read(CHUNK_SIZE) if not chunk: break h.update(chunk) processed += len(chunk) if show_progress: now = time.time() if now - last_report >= PROGRESS_INTERVAL: elapsed = now - start_time speed = processed / elapsed if elapsed > 0 else 0 percent = processed / total_size * 100 remaining = total_size - processed eta = remaining / speed if speed > 0 else 0 print( f" ⏳ {percent:6.2f}% | " f"{processed/1024/1024:8.1f} / {total_size/1024/1024:.1f} MB | " f"{speed/1024/1024:6.1f} MB/s | " f"ETA {time.strftime('%H:%M:%S', time.gmtime(eta))}", flush=True ) last_report = now if show_progress: total_time = time.time() - start_time avg_speed = total_size / total_time if total_time > 0 else 0 print( f" ✅ DONE | " f"{total_size/1024/1024:.1f} MB | " f"avg {avg_speed/1024/1024:.1f} MB/s | " f"time {time.strftime('%H:%M:%S', time.gmtime(total_time))}", flush=True ) return h.digest() except Exception as e: print(f"⚠️ HASH ERROR: {path} - {e}") raise def get_drive_info(): """Získá písmeno disku a jeho ID (které se použije jako host_name).""" print("\n💿 --- NASTAVENÍ SKENOVÁNÍ (EXTERNÍ DISK) ---") # 1. Písmeno disku while True: drive_input = input("📂 Zadejte písmeno disku ve Windows (např. 'E'): ").strip().upper() drive_letter = drive_input.replace(":", "").replace("\\", "").replace("/", "") if len(drive_letter) == 1 and drive_letter.isalpha(): drive_root = f"{drive_letter}:\\" if os.path.isdir(drive_root): break else: print(f"❌ Disk {drive_root} není dostupný.") else: print("❌ Neplatný formát.") # 2. Název disku -> HOST_NAME while True: disk_label = input("🏷️ Zadejte ID disku (bude uloženo jako 'host_name', např. '#HD015'): ").strip() if len(disk_label) >= 2: break print("❌ Název je příliš krátký.") return drive_root, disk_label def size_allowed(size: int) -> bool: if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE: return False if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE: return False return True # ============================== # MAIN # ============================== def main(): print("🚀 BLAKE3 External Disk Indexer", flush=True) print(f"🖥 Running on PC: {REAL_PC_HOSTNAME}", flush=True) # Získání vstupů scan_root, disk_hostname = get_drive_info() print(f"✅ Konfigurace:") print(f" Zdroj (Windows) : {scan_root}") print(f" DB Hostname : {disk_hostname}") print(f" DB Cesty : /Složka/Soubor...") try: db = pymysql.connect(**DB_CONFIG) cur = db.cursor() except Exception as e: print(f"❌ DB Connection failed: {e}") input("Enter pro konec...") return print(f"📥 Načítám index pro disk: '{disk_hostname}'...", flush=True) # === OPTIMALIZACE: Hledáme přesně podle host_name === cur.execute(""" SELECT full_path, file_size, UNIX_TIMESTAMP(mtime) FROM file_md5_index WHERE host_name = %s """, (disk_hostname,)) # Mapa: { "/Slozka/Soubor.ext": (size, mtime) } indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()} print(f"✅ Nalezeno {len(indexed_map):,} souborů v DB pro tento disk.", flush=True) print("======================================", flush=True) new_files = 0 skipped = 0 filtered = 0 errors = 0 seen_paths = set() # --- SCAN --- for root, dirs, files in os.walk(scan_root): # Ignorace systémových složek dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS] for fname in files: disk_path = os.path.join(root, fname) # 1. Stat (velikost, čas) try: stat = os.stat(disk_path) except OSError: errors += 1 continue size = stat.st_size if not size_allowed(size): filtered += 1 continue # 2. Vytvoření čisté cesty pro DB # E:\Filmy\Avatar.mkv -> Filmy\Avatar.mkv try: rel_path = os.path.relpath(disk_path, scan_root) except ValueError: errors += 1 continue # Normalizace na Linux style: Filmy/Avatar.mkv clean_path = rel_path.replace("\\", "/") # Přidání lomítka na začátek: /Filmy/Avatar.mkv if not clean_path.startswith("/"): clean_path = "/" + clean_path if clean_path in seen_paths: continue seen_paths.add(clean_path) mtime = int(stat.st_mtime) # === STRICT CHECK === is_match = False if clean_path in indexed_map: db_size, db_mtime = indexed_map[clean_path] if size == db_size and mtime == db_mtime: is_match = True if is_match: skipped += 1 if PRINT_SKIPPED: print(f"⏭ SKIP {clean_path}", flush=True) continue # === INSERT / UPDATE === print("➕ NEW / UPDATED", flush=True) print(f" File: {clean_path}", flush=True) print(f" Size: {size:,} B", flush=True) try: b3 = compute_blake3(disk_path) except Exception: errors += 1 continue cur.execute(""" INSERT INTO file_md5_index (os_name, host_name, full_path, file_name, directory, file_size, mtime, blake3) VALUES (%s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s) ON DUPLICATE KEY UPDATE file_size = VALUES(file_size), mtime = VALUES(mtime), blake3 = VALUES(blake3), updated_at = CURRENT_TIMESTAMP """, ( OS_NAME, # Např. 'Windows' (kde se to skenovalo) disk_hostname, # ZDE SE UKLÁDÁ '#HD015' clean_path, # ZDE SE UKLÁDÁ '/Filmy/Avatar.mkv' fname, os.path.dirname(clean_path), size, mtime, b3, )) new_files += 1 print(f" Hash: {b3.hex()}", flush=True) print("--------------------------------------", flush=True) print("======================================", flush=True) print(f"✅ Hotovo : {new_files}") print(f"⏭ Shoda : {skipped}") print(f"⚠️ Chyby : {errors}") print("🏁 Konec.") cur.close() db.close() # input("\nStiskněte Enter pro ukončení...") if __name__ == "__main__": main()