tw22

Merge remote-tracking branch 'origin/master'
z230
2026-01-17 20:23:58 +01:00 · 2026-01-13 16:43:13 +01:00 · 2026-01-13 16:42:40 +01:00 · 2026-01-11 21:13:38 +01:00 · 2026-01-11 20:14:52 +01:00 · 2026-01-08 10:15:45 +01:00
7 changed files with 1149 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
 .venv/
 .idea/
--- a/Library/10
+++ b/Library/10
@@ -0,0 +1,200 @@
 import os
 import sys
 import pymysql
 import pymysql.cursors
 # ================= KONFIGURACE =================
 # --- BEZPEČNOSTNÍ POJISTKA ---
 # True  = POUZE VÝPIS (nic se nesmaže, databáze se nezmění)
 # False = OSTRÝ REŽIM (maže soubory i záznamy v DB!)
 DRY_MODE = True
 # 1. Přístup k MySQL
 DB_CONFIG = {
    'host': '192.168.1.76',
    'port': 3307,
    'user': 'root',
    'password': 'Vlado9674+',
    'db': 'torrents',
    'charset': 'utf8mb4',
    'autocommit': True
 }
 TABULKA = "file_md5_index"
 # 2. Mapování cest
 SERVER_PREFIX = "/mnt/user/Library"
 # Používáme 'r' pro raw string, aby se zpětná lomítka chápala správně
 LOCAL_PREFIX = r"\\tower1\#library"
 # ===============================================
 def get_connection():
    return pymysql.connect(
        cursorclass=pymysql.cursors.DictCursor,
        **DB_CONFIG
    )
 def convert_path(db_path):
    """Převede cestu z Linux serveru na lokální cestu Windows."""
    if db_path.startswith(SERVER_PREFIX):
        relative_path = db_path[len(SERVER_PREFIX):]
        # Ořízneme počáteční lomítka z relativní cesty, aby fungoval join
        relative_path = relative_path.lstrip("/").lstrip("\\")
        # Spojí cesty a opraví lomítka
        local_path = os.path.join(LOCAL_PREFIX, relative_path)
        return os.path.normpath(local_path)
    return None
 def step_1_mark_duplicates():
    print(f"\n--- KROK 1: Hledání duplicit v DB (DRY_MODE={DRY_MODE}) ---")
    try:
        conn = get_connection()
        with conn.cursor() as cursor:
            if DRY_MODE:
                # V DRY_MODE jen počítáme, co bychom označili (neprovádíme UPDATE)
                sql = f"""
                SELECT COUNT(*) as pocet
                FROM {TABULKA} t1
                JOIN {TABULKA} t2 ON t1.blake3 = t2.blake3
                WHERE t1.host_name = 'TOWER1' 
                  AND t2.host_name = 'SYNOLOGY'
                  AND (t1.to_delete IS NULL OR t1.to_delete = 0);
                """
                cursor.execute(sql)
                result = cursor.fetchone()
                affected = result['pocet']
                print(f"[DRY-RUN] Našel jsem {affected} shodných záznamů (DB nebude změněna).")
            else:
                # V OSTRÉM režimu provádíme UPDATE
                sql = f"""
                UPDATE {TABULKA} t1
                JOIN {TABULKA} t2 ON t1.blake3 = t2.blake3
                SET t1.to_delete = 1
                WHERE t1.host_name = 'TOWER' 
                  AND t2.host_name = 'SYNOLOGY'
                  AND (t1.to_delete IS NULL OR t1.to_delete = 0);
                """
                print("Provádím UPDATE záznamů v databázi...")
                cursor.execute(sql)
                affected = cursor.rowcount
                conn.commit()
                print(f"Hotovo. Označeno {affected} záznamů ke smazání.")
        conn.close()
        return affected
    except pymysql.MySQLError as e:
        print(f"Chyba MySQL při označování: {e}")
        sys.exit(1)
 def step_2_delete_files():
    print(f"\n--- KROK 2: Mazání souborů (DRY_MODE={DRY_MODE}) ---")
    try:
        conn = get_connection()
        files_to_process = []
        with conn.cursor() as cursor:
            print("Stahuji seznam souborů...")
            if DRY_MODE:
                # V DRY_MODE nemůžeme hledat podle 'to_delete=1' (protože jsme nic neoznačili),
                # takže musíme použít JOIN dotaz přímo pro simulaci výpisu.
                sql = f"""
                SELECT t1.id, t1.full_path 
                FROM {TABULKA} t1
                JOIN {TABULKA} t2 ON t1.blake3 = t2.blake3
                WHERE t1.host_name = 'TOWER' 
                  AND t2.host_name = 'SYNOLOGY'
                  AND (t1.to_delete IS NULL OR t1.to_delete = 0)
                """
            else:
                # V OSTRÉM režimu bereme to, co jsme v kroku 1 označili
                sql = f"SELECT id, full_path FROM {TABULKA} WHERE host_name = 'TOWER' AND to_delete = 1"
            cursor.execute(sql)
            files_to_process = cursor.fetchall()
        count = len(files_to_process)
        print(f"Nalezeno {count} souborů.")
        if count == 0:
            print("Žádné soubory k zpracování. Konec.")
            return
        # V ostrém režimu se zeptáme na potvrzení
        if not DRY_MODE:
            confirm = input(f"-> [POZOR] Opravdu chcete SMAZAT {count} souborů? (napište 'ano'): ")
            if confirm.lower() != 'ano':
                print("Operace zrušena.")
                return
        else:
            print("-" * 40)
            print("VÝPIS SOUBORŮ, KTERÉ BY BYLY SMAZÁNY:")
            print("-" * 40)
        deleted_counter = 0
        errors = 0
        for row in files_to_process:
            db_id = row['id']
            server_path = row['full_path']
            local_path = convert_path(server_path)
            if not local_path:
                print(f"[SKIP PATH] Nesedí prefix: {server_path}")
                continue
            # --- LOGIKA DRY RUN vs REAL ---
            if DRY_MODE:
                # Pouze výpis
                print(f"[DRY-RUN] Bylo by smazáno: {local_path}")
                deleted_counter += 1
            else:
                # Ostré mazání
                try:
                    if os.path.exists(local_path):
                        os.remove(local_path)
                        print(f"[OK SMAZÁNO] {local_path}")
                        # Smazání z DB
                        with conn.cursor() as del_cursor:
                            del_sql = f"DELETE FROM {TABULKA} WHERE id = %s"
                            del_cursor.execute(del_sql, (db_id,))
                        conn.commit()
                        deleted_counter += 1
                    else:
                        print(f"[NENÍ NA DISKU] Mažu jen z DB: {local_path}")
                        with conn.cursor() as del_cursor:
                            del_sql = f"DELETE FROM {TABULKA} WHERE id = %s"
                            del_cursor.execute(del_sql, (db_id,))
                        conn.commit()
                        deleted_counter += 1
                except OSError as e:
                    print(f"[CHYBA OS] {local_path}: {e}")
                    errors += 1
                except pymysql.MySQLError as e:
                    print(f"[CHYBA DB] ID {db_id}: {e}")
        conn.close()
        print("-" * 30)
        if DRY_MODE:
            print(f"DRY RUN DOKONČEN. Zobrazena simulace pro {deleted_counter} souborů.")
        else:
            print(f"HOTOVO. Úspěšně smazáno: {deleted_counter}, Chyby: {errors}")
    except pymysql.MySQLError as e:
        print(f"Kritická chyba DB: {e}")
 if __name__ == "__main__":
    step_1_mark_duplicates()
    step_2_delete_files()
--- a/ReadKulhavaPST.py
+++ b/ReadKulhavaPST.py
@@ -0,0 +1,92 @@
 import win32com.client
 import os
 # Your specific file path
 pst_path = r'd:\Dropbox\!!!Days\Downloads Z230\PST\tkulhava.pst'
 def main():
    if not os.path.exists(pst_path):
        print(f"Error: File not found at {pst_path}")
        return
    try:
        # Connect to Outlook
        outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
        # 1. Add the PST to Outlook (This makes it visible in the sidebar)
        print(f"Mounting PST: {pst_path}...")
        outlook.AddStore(pst_path)
        # 2. Find the folder object for this PST
        # We search specifically for the folder that matches the filename 'tkulhava'
        # or grab the last added store if the name doesn't match exactly.
        pst_name = "tkulhava"  # derived from filename usually
        root_folder = None
        # Loop through all stores to find the new one
        for folder in outlook.Folders:
            if pst_name.lower() in folder.Name.lower():
                root_folder = folder
                break
        # Fallback: Just grab the last folder in the list if name didn't match
        if not root_folder:
            root_folder = outlook.Folders.GetLast()
        print(f"Successfully opened root folder: {root_folder.Name}")
        print("=" * 50)
        # 3. Start the recursive walk
        print_subjects_recursively(root_folder)
        # 4. Cleanup: Remove the PST from Outlook
        # (Comment this out if you want to keep it open in Outlook to inspect manually)
        outlook.RemoveStore(root_folder)
        print("\nDone. PST detached.")
    except Exception as e:
        print(f"An error occurred: {e}")
 def print_subjects_recursively(folder):
    """
    Recursively prints subjects of emails in a folder and its subfolders.
    """
    try:
        # Print current folder name for context
        # Check if folder has items
        if folder.Items.Count > 0:
            print(f"\n--- Folder: {folder.Name} ---")
        # Iterate through items
        for item in folder.Items:
            try:
                # Class 43 is a standard MailItem. 
                # Other items (meeting requests, reports) might not have a Subject or behave differently.
                if item.Class == 43:
                    print(f"Subject: {item.Subject}")
                else:
                    # Attempt to print subject anyway (e.g., for Meeting Items)
                    print(f"[{type_name(item.Class)}] Subject: {item.Subject}")
            except Exception:
                # Skip items that are corrupted or unreadable
                pass
        # Recursion: Go deeper into subfolders
        for subfolder in folder.Folders:
            print_subjects_recursively(subfolder)
    except Exception as e:
        print(f"Skipping restricted folder '{folder.Name}': {e}")
 def type_name(class_id):
    # Helper to identify non-email items
    if class_id == 53: return "Meeting"
    if class_id == 46: return "Report"
    return f"Type {class_id}"
 if __name__ == "__main__":
    main()
--- a/ReadKulhavaSavePhotos.py
+++ b/ReadKulhavaSavePhotos.py
@@ -0,0 +1,142 @@
 import win32com.client
 import os
 import pathlib
 # --- CONFIGURATION ---
 pst_path = r'd:\Dropbox\!!!Days\Downloads Z230\PST\tkulhava.pst'
 output_dir = r'd:\Dropbox\!!!Days\Downloads Z230\PST\pictures'
 # Image extensions to look for (case insensitive)
 IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tif', '.tiff'}
 def fix_encoding(text):
    """Repairs text wrongly decoded as cp1252 instead of cp1250."""
    if not text: return ""
    try:
        return text.encode('cp1252').decode('cp1250')
    except Exception:
        return text
 def get_unique_filepath(directory, filename):
    """
    Checks if a file exists. If so, adds a counter (_1, _2) to the filename
    until a unique name is found.
    """
    # Clean filename of illegal characters just in case
    filename = "".join(x for x in filename if x.isalnum() or x in "._- ")
    path = pathlib.Path(directory) / filename
    if not path.exists():
        return path
    # Split name and extension
    stem = path.stem
    suffix = path.suffix
    counter = 1
    while True:
        new_filename = f"{stem}_{counter}{suffix}"
        new_path = pathlib.Path(directory) / new_filename
        if not new_path.exists():
            return new_path
        counter += 1
 def process_item_attachments(item, save_folder):
    """Checks an item for attachments and saves pictures."""
    try:
        # Check if item has attachments
        if item.Attachments.Count > 0:
            for attachment in item.Attachments:
                try:
                    # Get filename and extension
                    fname = getattr(attachment, 'FileName', '')
                    if not fname: continue
                    # Fix encoding on filename if needed (sometimes attachments inherit bad encoding)
                    fname = fix_encoding(fname)
                    ext = os.path.splitext(fname)[1].lower()
                    if ext in IMAGE_EXTENSIONS:
                        # Determine unique path
                        save_path = get_unique_filepath(save_folder, fname)
                        # Save the file
                        attachment.SaveAsFile(str(save_path))
                        print(f"   [SAVED] {save_path.name}")
                except Exception as e:
                    print(f"   [ERROR saving attachment]: {e}")
    except Exception:
        # Some items (like corrupted notes) fail when accessing .Attachments
        pass
 def scan_folder_recursively(folder, save_folder):
    """Recursively walks folders and processes items."""
    try:
        folder_name = fix_encoding(folder.Name)
        # Optional: Print folder progress
        if folder.Items.Count > 0:
            print(f"Scanning Folder: {folder_name}...")
        # Process items in this folder
        for item in folder.Items:
            process_item_attachments(item, save_folder)
        # Recursion
        for subfolder in folder.Folders:
            scan_folder_recursively(subfolder, save_folder)
    except Exception as e:
        print(f"Skipping folder '{fix_encoding(folder.Name)}': {e}")
 def main():
    # 1. Ensure output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created directory: {output_dir}")
    if not os.path.exists(pst_path):
        print(f"Error: PST file not found at {pst_path}")
        return
    try:
        # 2. Connect to Outlook
        outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
        print(f"Mounting PST: {pst_path}...")
        outlook.AddStore(pst_path)
        # 3. Find the PST folder
        pst_name = "tkulhava"  # Usually derived from filename
        root_folder = None
        for folder in outlook.Folders:
            if pst_name.lower() in folder.Name.lower():
                root_folder = folder
                break
        if not root_folder:
            root_folder = outlook.Folders.GetLast()
        print(f"Opened: {fix_encoding(root_folder.Name)}")
        print(f"Saving pictures to: {output_dir}")
        print("=" * 50)
        # 4. Start processing
        scan_folder_recursively(root_folder, output_dir)
        # 5. Cleanup
        outlook.RemoveStore(root_folder)
        print("\nDone. PST detached.")
    except Exception as e:
        print(f"Critical Error: {e}")
 if __name__ == "__main__":
    main()
--- a/DuplicateTest.py
+++ b/DuplicateTest.py
@@ -0,0 +1,67 @@
 import pandas as pd
 from sqlalchemy import create_engine
 import time
 # --- KONFIGURACE ---
 db_user = 'root'
 db_pass = 'Vlado9674+'
 db_host = '192.168.1.76'
 db_port = '3307'
 db_name = 'torrents'  # <--- ZDE DOPLNIT NÁZEV DATABÁZE
 # --- PŘIPOJENÍ ---
 connection_string = f'mysql+mysqlconnector://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}'
 engine = create_engine(connection_string)
 # SQL dotaz - vybíráme i full_path, abychom mohli v Pandas ukázat příklad cesty
 # POZOR: Načítání 5.8M textových řetězců (full_path) zabere dost RAM (odhadem 2-4 GB).
 query = """
 SELECT id, blake3, file_size, full_path 
 FROM file_md5_index FORCE INDEX (idx_full_path_prefix)
 WHERE host_name='Tower1' AND full_path LIKE '/mnt/user/#Library%'
 """
 print("1. Začínám stahovat data z MySQL do RAM...")
 start_load = time.time()
 try:
    # Stáhnutí dat
    df = pd.read_sql(query, engine)
    end_load = time.time()
    print(f"-> Data stažena za: {end_load - start_load:.2f} sekund")
    print(f"-> Počet řádků v paměti: {len(df)}")
    print("\n2. Začínám hledat duplicity (Pandas GroupBy)...")
    start_process = time.time()
    # Logika hledání duplicit
    # Najdeme jen ty, co mají duplicitní hash
    duplicity = df[df.duplicated(subset=['blake3'], keep=False)]
    if not duplicity.empty:
        # Seskupení
        vysledek = duplicity.groupby('blake3').agg({
            'file_size': 'first',  # Velikost souboru (předpokládáme stejnou pro stejný hash)
            'id': 'count',  # Počet výskytů
            'full_path': lambda x: x.iloc[0]  # Ukázka první cesty (rychlejší než 'first')
        }).rename(columns={'id': 'pocet_kopii'})
        # Filtrujeme jen ty, co mají skutečně více kopií a seřadíme podle velikosti * počet kopií
        # (Chceme vidět, kde plýtváme nejvíc místa)
        vysledek['celkove_plytvani'] = vysledek['file_size'] * (vysledek['pocet_kopii'] - 1)
        vysledek = vysledek.sort_values('celkove_plytvani', ascending=False)
        end_process = time.time()
        print(f"-> Zpracováno za: {end_process - start_process:.4f} sekund")
        print("\n--- TOP 20 NEJVĚTŠÍCH DUPLICIT ---")
        # Zobrazíme hash, počet kopií, velikost jednoho souboru a ukázku cesty
        print(vysledek[['pocet_kopii', 'file_size', 'full_path']].head(20))
        print(f"\nCelkem nalezeno {len(vysledek)} unikátních souborů, které mají duplicity.")
    else:
        print("Nebyly nalezeny žádné duplicity.")
 except Exception as e:
    print(f"\nCHYBA: {e}")
    print("Zkontrolujte prosím název databáze a jestli máte dost paměti RAM.")
--- a/WalkFilesOnBackupHDD/10
+++ b/WalkFilesOnBackupHDD/10
@@ -0,0 +1,295 @@
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 """
 FAST FILE HASH INDEXER – WINDOWS CLIENT (EXTERNAL DISKS)
 - Mode: PHYSICAL BACKUP
 - Hostname in DB = Disk Label (e.g., #HD015)
 - Path in DB     = Relative path (e.g., /Movies/Film.mkv)
 """
 import os, time
 import pymysql
 import socket
 import platform
 import sys
 from blake3 import blake3
 # ==============================
 # CONFIG
 # ==============================
 CHUNK_SIZE = 5 * 1024 * 1024   # 5 MB
 PROGRESS_MIN_SIZE = 500 * 1024 * 1024  # 500 MB
 PROGRESS_INTERVAL = 1.0  # seconds
 EXCLUDED_DIRS = {"$RECYCLE.BIN", "System Volume Information", "RECYCLER", "msdownld.tmp"}
 # --- Limity velikosti ---
 FILE_MIN_SIZE = 0
 FILE_MAX_SIZE = 1024 * 1024 * 1024* 1024  # 1TB
 # --- Nastavení Databáze ---
 DB_CONFIG = {
    "host": "192.168.1.76",
    "port": 3307,
    "user": "root",
    "password": "Vlado9674+",
    "database": "torrents",
    "charset": "utf8mb4",
    "autocommit": True,
 }
 CHUNK_SIZE = 4 * 1024 * 1024  # 4 MB
 PRINT_SKIPPED = False  # True = vypisovat i přeskočené
 # ==============================
 # SYSTEM INFO
 # ==============================
 # Fyzický název PC (jen pro výpis do konzole, do DB půjde název disku)
 REAL_PC_HOSTNAME = socket.gethostname()
 OS_NAME = platform.system()
 # ==============================
 # FUNCTIONS
 # ==============================
 def compute_blake3(path: str) -> bytes:
    h = blake3()
    total_size = os.path.getsize(path)
    show_progress = total_size >= PROGRESS_MIN_SIZE
    processed = 0
    start_time = time.time()
    last_report = start_time
    try:
        with open(path, "rb") as f:
            while True:
                chunk = f.read(CHUNK_SIZE)
                if not chunk:
                    break
                h.update(chunk)
                processed += len(chunk)
                if show_progress:
                    now = time.time()
                    if now - last_report >= PROGRESS_INTERVAL:
                        elapsed = now - start_time
                        speed = processed / elapsed if elapsed > 0 else 0
                        percent = processed / total_size * 100
                        remaining = total_size - processed
                        eta = remaining / speed if speed > 0 else 0
                        print(
                            f"   ⏳ {percent:6.2f}% | "
                            f"{processed/1024/1024:8.1f} / {total_size/1024/1024:.1f} MB | "
                            f"{speed/1024/1024:6.1f} MB/s | "
                            f"ETA {time.strftime('%H:%M:%S', time.gmtime(eta))}",
                            flush=True
                        )
                        last_report = now
        if show_progress:
            total_time = time.time() - start_time
            avg_speed = total_size / total_time if total_time > 0 else 0
            print(
                f"   ✅ DONE | "
                f"{total_size/1024/1024:.1f} MB | "
                f"avg {avg_speed/1024/1024:.1f} MB/s | "
                f"time {time.strftime('%H:%M:%S', time.gmtime(total_time))}",
                flush=True
            )
        return h.digest()
    except Exception as e:
        print(f"⚠️ HASH ERROR: {path} - {e}")
        raise
 def get_drive_info():
    """Získá písmeno disku a jeho ID (které se použije jako host_name)."""
    print("\n💿 --- NASTAVENÍ SKENOVÁNÍ (EXTERNÍ DISK) ---")
    # 1. Písmeno disku
    while True:
        drive_input = input("📂 Zadejte písmeno disku ve Windows (např. 'E'): ").strip().upper()
        drive_letter = drive_input.replace(":", "").replace("\\", "").replace("/", "")
        if len(drive_letter) == 1 and drive_letter.isalpha():
            drive_root = f"{drive_letter}:\\"
            if os.path.isdir(drive_root):
                break
            else:
                print(f"❌ Disk {drive_root} není dostupný.")
        else:
            print("❌ Neplatný formát.")
    # 2. Název disku -> HOST_NAME
    while True:
        disk_label = input("🏷️  Zadejte ID disku (bude uloženo jako 'host_name', např. '#HD015'): ").strip()
        if len(disk_label) >= 2:
            break
        print("❌ Název je příliš krátký.")
    return drive_root, disk_label
 def size_allowed(size: int) -> bool:
    if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE: return False
    if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE: return False
    return True
 # ==============================
 # MAIN
 # ==============================
 def main():
    print("🚀 BLAKE3 External Disk Indexer", flush=True)
    print(f"🖥 Running on PC: {REAL_PC_HOSTNAME}", flush=True)
    # Získání vstupů
    scan_root, disk_hostname = get_drive_info()
    print(f"✅ Konfigurace:")
    print(f"   Zdroj (Windows) : {scan_root}")
    print(f"   DB Hostname     : {disk_hostname}")
    print(f"   DB Cesty        : /Složka/Soubor...")
    try:
        db = pymysql.connect(**DB_CONFIG)
        cur = db.cursor()
    except Exception as e:
        print(f"❌ DB Connection failed: {e}")
        input("Enter pro konec...")
        return
    print(f"📥 Načítám index pro disk: '{disk_hostname}'...", flush=True)
    # === OPTIMALIZACE: Hledáme přesně podle host_name ===
    cur.execute("""
        SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
        FROM file_md5_index
        WHERE host_name = %s
    """, (disk_hostname,))
    # Mapa: { "/Slozka/Soubor.ext": (size, mtime) }
    indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()}
    print(f"✅ Nalezeno {len(indexed_map):,} souborů v DB pro tento disk.", flush=True)
    print("======================================", flush=True)
    new_files = 0
    skipped = 0
    filtered = 0
    errors = 0
    seen_paths = set()
    # --- SCAN ---
    for root, dirs, files in os.walk(scan_root):
        # Ignorace systémových složek
        dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
        for fname in files:
            disk_path = os.path.join(root, fname)
            # 1. Stat (velikost, čas)
            try:
                stat = os.stat(disk_path)
            except OSError:
                errors += 1
                continue
            size = stat.st_size
            if not size_allowed(size):
                filtered += 1
                continue
            # 2. Vytvoření čisté cesty pro DB
            # E:\Filmy\Avatar.mkv -> Filmy\Avatar.mkv
            try:
                rel_path = os.path.relpath(disk_path, scan_root)
            except ValueError:
                errors += 1
                continue
            # Normalizace na Linux style: Filmy/Avatar.mkv
            clean_path = rel_path.replace("\\", "/")
            # Přidání lomítka na začátek: /Filmy/Avatar.mkv
            if not clean_path.startswith("/"):
                clean_path = "/" + clean_path
            if clean_path in seen_paths:
                continue
            seen_paths.add(clean_path)
            mtime = int(stat.st_mtime)
            # === STRICT CHECK ===
            is_match = False
            if clean_path in indexed_map:
                db_size, db_mtime = indexed_map[clean_path]
                if size == db_size and mtime == db_mtime:
                    is_match = True
            if is_match:
                skipped += 1
                if PRINT_SKIPPED:
                    print(f"⏭ SKIP {clean_path}", flush=True)
                continue
            # === INSERT / UPDATE ===
            print("➕ NEW / UPDATED", flush=True)
            print(f"   File: {clean_path}", flush=True)
            print(f"   Size: {size:,} B", flush=True)
            try:
                b3 = compute_blake3(disk_path)
            except Exception:
                errors += 1
                continue
            cur.execute("""
                INSERT INTO file_md5_index
                    (os_name, host_name, full_path, file_name, directory,
                     file_size, mtime, blake3)
                VALUES (%s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s)
                ON DUPLICATE KEY UPDATE
                    file_size  = VALUES(file_size),
                    mtime      = VALUES(mtime),
                    blake3     = VALUES(blake3),
                    updated_at = CURRENT_TIMESTAMP
            """, (
                OS_NAME,  # Např. 'Windows' (kde se to skenovalo)
                disk_hostname,  # ZDE SE UKLÁDÁ '#HD015'
                clean_path,  # ZDE SE UKLÁDÁ '/Filmy/Avatar.mkv'
                fname,
                os.path.dirname(clean_path),
                size,
                mtime,
                b3,
            ))
            new_files += 1
            print(f"   Hash: {b3.hex()}", flush=True)
            print("--------------------------------------", flush=True)
    print("======================================", flush=True)
    print(f"✅ Hotovo : {new_files}")
    print(f"⏭ Shoda  : {skipped}")
    print(f"⚠️ Chyby  : {errors}")
    print("🏁 Konec.")
    cur.close()
    db.close()
    # input("\nStiskněte Enter pro ukončení...")
 if __name__ == "__main__":
    main()
--- a/WalkFilesOnBackupHDD/20
+++ b/WalkFilesOnBackupHDD/20
@@ -0,0 +1,351 @@
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 r"""
 FAST FILE HASH INDEXER – WINDOWS CLIENT (HARDCODED CONFIG)
 - Mode: PHYSICAL BACKUP
 - Hostname in DB = Disk Label (e.g., #HD015)
 - Path in DB     = Relative path (e.g., /Movies/Film.mkv)
 """
 import os, time
 import pymysql
 import socket
 import platform
 import sys
 from blake3 import blake3
 # ==============================
 # ⚙️ USER CONFIGURATION
 # ==============================
 DISK_DRIVE_LETTER = "f"  # (e.g., "E", "F", "P")
 DISK_HOSTNAME = "#HD16"  # (e.g., "#HD015")
 # 🔒 SAFETY SWITCH
 # True  = LIST ONLY (No DB changes). "Simulates" the run.
 # False = EXECUTE (Deletes and Inserts into DB).
 DRY_RUN = False
 # ==============================
 # TECHNICAL CONFIG
 # ==============================
 CHUNK_SIZE = 5 * 1024 * 1024  # 5 MB
 PROGRESS_MIN_SIZE = 500 * 1024 * 1024  # 500 MB
 PROGRESS_INTERVAL = 1.0  # seconds
 EXCLUDED_DIRS = {"$RECYCLE.BIN", "System Volume Information", "RECYCLER", "msdownld.tmp"}
 # --- File Size Limits ---
 FILE_MIN_SIZE = 0
 FILE_MAX_SIZE = 1024 * 1024*1024*1024  # 1TB
 # --- DB Config ---
 DB_CONFIG = {
    "host": "192.168.1.76",
    "port": 3307,
    "user": "root",
    "password": "Vlado9674+",
    "database": "torrents",
    "charset": "utf8mb4",
    "autocommit": True,
 }
 PRINT_SKIPPED = False  # Set True to see files that were already in DB
 # ==============================
 # SYSTEM INFO
 # ==============================
 REAL_PC_HOSTNAME = socket.gethostname()
 OS_NAME = platform.system()
 # ==============================
 # FUNCTIONS
 # ==============================
 def compute_blake3(path: str) -> bytes:
    h = blake3()
    total_size = os.path.getsize(path)
    show_progress = total_size >= PROGRESS_MIN_SIZE
    processed = 0
    start_time = time.time()
    last_report = start_time
    try:
        with open(path, "rb") as f:
            while True:
                chunk = f.read(CHUNK_SIZE)
                if not chunk:
                    break
                h.update(chunk)
                processed += len(chunk)
                if show_progress:
                    now = time.time()
                    if now - last_report >= PROGRESS_INTERVAL:
                        elapsed = now - start_time
                        speed = processed / elapsed if elapsed > 0 else 0
                        percent = processed / total_size * 100
                        remaining = total_size - processed
                        eta = remaining / speed if speed > 0 else 0
                        print(
                            f"   ⏳ {percent:6.2f}% | "
                            f"{processed / 1024 / 1024:8.1f} / {total_size / 1024 / 1024:.1f} MB | "
                            f"{speed / 1024 / 1024:6.1f} MB/s | "
                            f"ETA {time.strftime('%H:%M:%S', time.gmtime(eta))}",
                            flush=True
                        )
                        last_report = now
        if show_progress:
            total_time = time.time() - start_time
            avg_speed = total_size / total_time if total_time > 0 else 0
            print(
                f"   ✅ DONE | "
                f"{total_size / 1024 / 1024:.1f} MB | "
                f"avg {avg_speed / 1024 / 1024:.1f} MB/s | "
                f"time {time.strftime('%H:%M:%S', time.gmtime(total_time))}",
                flush=True
            )
        return h.digest()
    except Exception as e:
        print(f"⚠️ HASH ERROR: {path} - {e}")
        raise
 def size_allowed(size: int) -> bool:
    if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE: return False
    if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE: return False
    return True
 def normalize_db_path(scan_root, disk_path):
    """
    Converts a physical Windows path to the standardized DB format.
    E:\Movies\File.mkv -> /Movies/File.mkv
    """
    try:
        rel_path = os.path.relpath(disk_path, scan_root)
    except ValueError:
        return None
    # Windows backslash to slash
    clean_path = rel_path.replace("\\", "/")
    # Ensure leading slash
    if not clean_path.startswith("/"):
        clean_path = "/" + clean_path
    return clean_path
 # ==============================
 # MAIN
 # ==============================
 def main():
    print("🚀 BLAKE3 External Disk Indexer", flush=True)
    print(f"🖥 Running on PC: {REAL_PC_HOSTNAME}", flush=True)
    if DRY_RUN:
        print("🛡️  DRY RUN MODE ACTIVE: No changes will be made to DB.", flush=True)
    else:
        print("⚠️  LIVE MODE: Changes WILL be committed to DB.", flush=True)
    # Build root path
    scan_root = f"{DISK_DRIVE_LETTER}:\\"
    if not os.path.isdir(scan_root):
        print(f"❌ ERROR: Drive '{scan_root}' not found!")
        print(f"   Please check DISK_DRIVE_LETTER in config.")
        return
    print(f"✅ Config:")
    print(f"   Source (Win) : {scan_root}")
    print(f"   DB Hostname  : {DISK_HOSTNAME}")
    try:
        db = pymysql.connect(**DB_CONFIG)
        cur = db.cursor()
    except Exception as e:
        print(f"❌ DB Connection failed: {e}")
        return
    print(f"📥 Loading DB index for: '{DISK_HOSTNAME}'...", flush=True)
    # === LOAD EXISTING DB RECORDS ===
    cur.execute("""
        SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
        FROM file_md5_index
        WHERE host_name = %s
    """, (DISK_HOSTNAME,))
    indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()}
    print(f"✅ Found {len(indexed_map):,} files in DB for this disk.", flush=True)
    # =========================================================
    # PHASE 1: CLEANUP (DELETE MISSING FILES)
    # =========================================================
    print("======================================", flush=True)
    print("🧹 PHASE 1: Checking for deleted files...", flush=True)
    current_disk_paths = set()
    # Fast walk just to get paths
    for root, dirs, files in os.walk(scan_root):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
        for fname in files:
            disk_path = os.path.join(root, fname)
            clean_path = normalize_db_path(scan_root, disk_path)
            if clean_path:
                current_disk_paths.add(clean_path)
    paths_to_delete = set(indexed_map.keys()) - current_disk_paths
    if paths_to_delete:
        print(f"🗑️  Found {len(paths_to_delete):,} files to delete from DB.")
        if DRY_RUN:
            print("🛡️  [DRY RUN] Listing files to be deleted (No action taken):")
            for p in sorted(list(paths_to_delete))[:20]:  # Print first 20
                print(f"   - {p}")
            if len(paths_to_delete) > 20:
                print(f"   ... and {len(paths_to_delete) - 20} more.")
        else:
            # Delete in batches
            batch_size = 1000
            to_delete_list = list(paths_to_delete)
            for i in range(0, len(to_delete_list), batch_size):
                batch = to_delete_list[i: i + batch_size]
                format_strings = ','.join(['%s'] * len(batch))
                query = f"DELETE FROM file_md5_index WHERE host_name = %s AND full_path IN ({format_strings})"
                try:
                    cur.execute(query, [DISK_HOSTNAME] + batch)
                    print(f"   ... deleted batch {i}-{i + len(batch)}")
                except Exception as e:
                    print(f"❌ Error deleting batch: {e}")
            # Update local map
            for p in paths_to_delete:
                del indexed_map[p]
            print("✅ Cleanup complete.")
    else:
        print("✅ No deleted files detected.")
    # =========================================================
    # PHASE 2: SCAN & UPDATE (HASHING)
    # =========================================================
    print("======================================", flush=True)
    print("🚀 PHASE 2: Scanning for changes & new files...", flush=True)
    new_files = 0
    skipped = 0
    filtered = 0
    errors = 0
    seen_paths = set()
    for root, dirs, files in os.walk(scan_root):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
        for fname in files:
            disk_path = os.path.join(root, fname)
            try:
                stat = os.stat(disk_path)
            except OSError:
                errors += 1
                continue
            size = stat.st_size
            if not size_allowed(size):
                filtered += 1
                continue
            clean_path = normalize_db_path(scan_root, disk_path)
            if not clean_path:
                errors += 1
                continue
            if clean_path in seen_paths:
                continue
            seen_paths.add(clean_path)
            mtime = int(stat.st_mtime)
            # === MATCH CHECK ===
            is_match = False
            if clean_path in indexed_map:
                db_size, db_mtime = indexed_map[clean_path]
                if size == db_size and mtime == db_mtime:
                    is_match = True
            if is_match:
                skipped += 1
                if PRINT_SKIPPED:
                    print(f"⏭ SKIP {clean_path}", flush=True)
                continue
            # === INSERT / UPDATE ===
            print("➕ NEW / UPDATED", flush=True)
            print(f"   File: {clean_path}", flush=True)
            print(f"   Size: {size:,} B", flush=True)
            try:
                b3 = compute_blake3(disk_path)
            except Exception:
                errors += 1
                continue
            if DRY_RUN:
                print(f"🛡️  [DRY RUN] Would INSERT/UPDATE: {clean_path}")
                print(f"   Hash: {b3.hex()}")
                new_files += 1
            else:
                cur.execute("""
                    INSERT INTO file_md5_index
                        (os_name, host_name, full_path, file_name, directory,
                        file_size, mtime, blake3)
                    VALUES (%s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s)
                    ON DUPLICATE KEY UPDATE
                        file_size  = VALUES(file_size),
                        mtime      = VALUES(mtime),
                        blake3     = VALUES(blake3),
                        updated_at = CURRENT_TIMESTAMP
                """, (
                    OS_NAME,
                    DISK_HOSTNAME,
                    clean_path,
                    fname,
                    os.path.dirname(clean_path),
                    size,
                    mtime,
                    b3,
                ))
                new_files += 1
                print(f"   Hash: {b3.hex()}", flush=True)
            print("--------------------------------------", flush=True)
    print("======================================", flush=True)
    print(f"✅ Processed  : {new_files}")
    print(f"⏭ Skipped    : {skipped}")
    print(f"🗑 Deleted    : {len(paths_to_delete)} " + ("(DRY RUN)" if DRY_RUN else ""))
    print(f"⚠️ Errors     : {errors}")
    print("🏁 Done.")
    cur.close()
    db.close()
 if __name__ == "__main__":
    main()
Author	SHA1	Message	Date
vlado	50ee068af9	tw22	2026-01-17 20:23:58 +01:00
vladimir.buzalka	c30a582323	Merge remote-tracking branch 'origin/master'	2026-01-13 16:43:13 +01:00
vladimir.buzalka	01aa1249b9	z230	2026-01-13 16:42:40 +01:00
vlado	b74e180022	tw22	2026-01-11 21:13:38 +01:00
vlado	2037d1b887	tw22	2026-01-11 20:14:52 +01:00
vlado	6cdabc64b4	tw22	2026-01-08 10:15:45 +01:00
vladimir.buzalka	2aee823e87	z230	2026-01-06 10:09:51 +01:00
vladimir.buzalka	b61a8a5473	z230	2026-01-06 10:09:25 +01:00
vladimir.buzalka	83f2d0dafc	z230	2026-01-06 10:05:35 +01:00