#!/usr/bin/python3
# -*- coding: utf-8 -*-

r"""
FAST FILE HASH INDEXER – WINDOWS CLIENT (HARDCODED CONFIG)
- Mode: PHYSICAL BACKUP
- Hostname in DB = Disk Label (e.g., #HD015)
- Path in DB     = Relative path (e.g., /Movies/Film.mkv)
"""

import os
import time
import pymysql
import socket
import platform
import sys
import hashlib
from blake3 import blake3

# ==============================
# ⚙️ USER CONFIGURATION
# ==============================
DISK_DRIVE_LETTER = "z"  # (e.g., "E", "F", "P")
DISK_HOSTNAME = "TW22"  # (e.g., "#HD015")

# 🔒 SAFETY SWITCH
DRY_RUN = False

# ==============================
# TECHNICAL CONFIG
# ==============================
CHUNK_SIZE = 5 * 1024 * 1024  # 5 MB
PROGRESS_MIN_SIZE = 500 * 1024 * 1024  # 500 MB
PROGRESS_INTERVAL = 1.0  # seconds

EXCLUDED_DIRS = {"$RECYCLE.BIN", "System Volume Information", "RECYCLER", "msdownld.tmp"}

# --- File Size Limits ---
FILE_MIN_SIZE = 0
FILE_MAX_SIZE = 1024 * 1024 * 1024 * 1024  # 1TB

# --- DB Config ---
DB_CONFIG = {
    "host": "192.168.1.50",
    "port": 3306,
    "user": "root",
    "password": "Vlado9674+",
    "database": "torrents",
    "charset": "utf8mb4",
    "autocommit": True,
}

PRINT_SKIPPED = False  # Set True to see files that were already in DB

# ==============================
# SYSTEM INFO
# ==============================
REAL_PC_HOSTNAME = socket.gethostname()
OS_NAME = platform.system()


# ==============================
# FUNCTIONS
# ==============================

def get_path_hash(path_str: str) -> bytes:
    """Calculates MD5 hash of the path and returns raw 16 bytes for BINARY(16)."""
    return hashlib.md5(path_str.encode('utf-8')).digest()


def compute_blake3(path: str) -> bytes:
    h = blake3()
    total_size = os.path.getsize(path)
    show_progress = total_size >= PROGRESS_MIN_SIZE

    processed = 0
    start_time = time.time()
    last_report = start_time

    try:
        with open(path, "rb") as f:
            while True:
                chunk = f.read(CHUNK_SIZE)
                if not chunk:
                    break

                h.update(chunk)
                processed += len(chunk)

                if show_progress:
                    now = time.time()
                    if now - last_report >= PROGRESS_INTERVAL:
                        elapsed = now - start_time
                        speed = processed / elapsed if elapsed > 0 else 0
                        percent = processed / total_size * 100
                        remaining = total_size - processed
                        eta = remaining / speed if speed > 0 else 0

                        print(
                            f"   ⏳ {percent:6.2f}% | "
                            f"{processed / 1024 / 1024:8.1f} / {total_size / 1024 / 1024:.1f} MB | "
                            f"{speed / 1024 / 1024:6.1f} MB/s | "
                            f"ETA {time.strftime('%H:%M:%S', time.gmtime(eta))}",
                            flush=True
                        )
                        last_report = now

        if show_progress:
            total_time = time.time() - start_time
            avg_speed = total_size / total_time if total_time > 0 else 0
            print(
                f"   ✅ DONE | "
                f"{total_size / 1024 / 1024:.1f} MB | "
                f"avg {avg_speed / 1024 / 1024:.1f} MB/s | "
                f"time {time.strftime('%H:%M:%S', time.gmtime(total_time))}",
                flush=True
            )

        return h.digest()

    except Exception as e:
        print(f"⚠️ HASH ERROR: {path} - {e}")
        raise


def size_allowed(size: int) -> bool:
    if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE: return False
    if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE: return False
    return True


def normalize_db_path(scan_root, disk_path):
    """
    Converts a physical Windows path to the standardized DB format.
    E:\Movies\File.mkv -> /Movies/File.mkv
    """
    try:
        rel_path = os.path.relpath(disk_path, scan_root)
    except ValueError:
        return None

    clean_path = rel_path.replace("\\", "/")
    if not clean_path.startswith("/"):
        clean_path = "/" + clean_path

    return clean_path


# ==============================
# MAIN
# ==============================

def main():
    print("🚀 BLAKE3 External Disk Indexer (MySQL 9 Compatible)", flush=True)
    print(f"🖥 Running on PC: {REAL_PC_HOSTNAME}", flush=True)

    if DRY_RUN:
        print("🛡️  DRY RUN MODE ACTIVE: No changes will be made to DB.", flush=True)
    else:
        print("⚠️  LIVE MODE: Changes WILL be committed to DB.", flush=True)

    scan_root = f"{DISK_DRIVE_LETTER}:\\"

    if not os.path.isdir(scan_root):
        print(f"❌ ERROR: Drive '{scan_root}' not found!")
        return

    try:
        db = pymysql.connect(**DB_CONFIG)
        cur = db.cursor()
    except Exception as e:
        print(f"❌ DB Connection failed: {e}")
        return

    print(f"📥 Loading DB index for: '{DISK_HOSTNAME}'...", flush=True)

    # === LOAD EXISTING DB RECORDS ===
    # We load path_hash as well for precise deletion
    cur.execute("""
        SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
        FROM file_md5_index
        WHERE host_name = %s
    """, (DISK_HOSTNAME,))

    indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()}
    print(f"✅ Found {len(indexed_map):,} files in DB for this disk.", flush=True)

    # =========================================================
    # PHASE 1: CLEANUP (DELETE MISSING FILES)
    # =========================================================
    print("======================================", flush=True)
    print("🧹 PHASE 1: Checking for deleted files...", flush=True)

    current_disk_paths = set()
    for root, dirs, files in os.walk(scan_root):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
        for fname in files:
            disk_path = os.path.join(root, fname)
            clean_path = normalize_db_path(scan_root, disk_path)
            if clean_path:
                current_disk_paths.add(clean_path)

    paths_to_delete = set(indexed_map.keys()) - current_disk_paths

    if paths_to_delete:
        print(f"🗑️  Found {len(paths_to_delete):,} files to delete from DB.")
        if DRY_RUN:
            for p in sorted(list(paths_to_delete))[:20]:
                print(f"   - {p}")
        else:
            # Delete using path_hash for index efficiency
            batch_size = 500
            to_delete_list = list(paths_to_delete)
            for i in range(0, len(to_delete_list), batch_size):
                batch_paths = to_delete_list[i: i + batch_size]
                # Map paths to their MD5 hashes
                batch_hashes = [get_path_hash(p) for p in batch_paths]

                format_strings = ','.join(['%s'] * len(batch_hashes))
                query = f"DELETE FROM file_md5_index WHERE host_name = %s AND path_hash IN ({format_strings})"

                try:
                    cur.execute(query, [DISK_HOSTNAME] + batch_hashes)
                except Exception as e:
                    print(f"❌ Error deleting batch: {e}")

            for p in paths_to_delete:
                del indexed_map[p]
            print("✅ Cleanup complete.")
    else:
        print("✅ No deleted files detected.")

    # =========================================================
    # PHASE 2: SCAN & UPDATE (HASHING)
    # =========================================================
    print("======================================", flush=True)
    print("🚀 PHASE 2: Scanning for changes & new files...", flush=True)

    new_files = 0
    skipped = 0
    errors = 0
    seen_paths = set()

    for root, dirs, files in os.walk(scan_root):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
        for fname in files:
            disk_path = os.path.join(root, fname)
            try:
                stat = os.stat(disk_path)
            except OSError:
                errors += 1
                continue

            size = stat.st_size
            if not size_allowed(size):
                continue

            clean_path = normalize_db_path(scan_root, disk_path)
            if not clean_path or clean_path in seen_paths:
                continue
            seen_paths.add(clean_path)

            mtime = int(stat.st_mtime)

            # Match Check
            if clean_path in indexed_map:
                db_size, db_mtime = indexed_map[clean_path]
                if size == db_size and mtime == db_mtime:
                    skipped += 1
                    continue

            # Compute Hashes
            try:
                b3_hash = compute_blake3(disk_path)
                p_hash = get_path_hash(clean_path)  # Essential for MySQL 9 Unique Index
            except Exception:
                errors += 1
                continue

            if DRY_RUN:
                print(f"🛡️  [DRY RUN] NEW/UPDATE: {clean_path}")
                new_files += 1
            else:
                cur.execute("""
                    INSERT INTO file_md5_index
                        (os_name, host_name, full_path, path_hash, file_name, directory,
                        file_size, mtime, blake3)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s)
                    ON DUPLICATE KEY UPDATE
                        file_size  = VALUES(file_size),
                        mtime      = VALUES(mtime),
                        blake3     = VALUES(blake3),
                        updated_at = CURRENT_TIMESTAMP
                """, (
                    OS_NAME, DISK_HOSTNAME, clean_path, p_hash, fname,
                    os.path.dirname(clean_path), size, mtime, b3_hash
                ))
                new_files += 1
                print(f"➕ ADDED: {clean_path} | {b3_hash.hex()[:8]}...")

    print("======================================", flush=True)
    print(f"✅ Processed  : {new_files}")
    print(f"⏭ Skipped    : {skipped}")
    print(f"🗑 Deleted    : {len(paths_to_delete)}")
    print(f"⚠️ Errors     : {errors}")
    print("🏁 Done.")

    cur.close()
    db.close()


if __name__ == "__main__":
    main()