Files
walkfiles/WalkFilesOnBackupHDD/10 WalkBackupHDD.py
2026-01-08 10:15:45 +01:00

295 lines
9.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
FAST FILE HASH INDEXER WINDOWS CLIENT (EXTERNAL DISKS)
- Mode: PHYSICAL BACKUP
- Hostname in DB = Disk Label (e.g., #HD015)
- Path in DB = Relative path (e.g., /Movies/Film.mkv)
"""
import os, time
import pymysql
import socket
import platform
import sys
from blake3 import blake3
# ==============================
# CONFIG
# ==============================
CHUNK_SIZE = 5 * 1024 * 1024 # 5 MB
PROGRESS_MIN_SIZE = 500 * 1024 * 1024 # 500 MB
PROGRESS_INTERVAL = 1.0 # seconds
EXCLUDED_DIRS = {"$RECYCLE.BIN", "System Volume Information", "RECYCLER", "msdownld.tmp"}
# --- Limity velikosti ---
FILE_MIN_SIZE = 0
FILE_MAX_SIZE = 1024 * 1024 * 1024* 1024 # 1TB
# --- Nastavení Databáze ---
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "torrents",
"charset": "utf8mb4",
"autocommit": True,
}
CHUNK_SIZE = 4 * 1024 * 1024 # 4 MB
PRINT_SKIPPED = False # True = vypisovat i přeskočené
# ==============================
# SYSTEM INFO
# ==============================
# Fyzický název PC (jen pro výpis do konzole, do DB půjde název disku)
REAL_PC_HOSTNAME = socket.gethostname()
OS_NAME = platform.system()
# ==============================
# FUNCTIONS
# ==============================
def compute_blake3(path: str) -> bytes:
h = blake3()
total_size = os.path.getsize(path)
show_progress = total_size >= PROGRESS_MIN_SIZE
processed = 0
start_time = time.time()
last_report = start_time
try:
with open(path, "rb") as f:
while True:
chunk = f.read(CHUNK_SIZE)
if not chunk:
break
h.update(chunk)
processed += len(chunk)
if show_progress:
now = time.time()
if now - last_report >= PROGRESS_INTERVAL:
elapsed = now - start_time
speed = processed / elapsed if elapsed > 0 else 0
percent = processed / total_size * 100
remaining = total_size - processed
eta = remaining / speed if speed > 0 else 0
print(
f"{percent:6.2f}% | "
f"{processed/1024/1024:8.1f} / {total_size/1024/1024:.1f} MB | "
f"{speed/1024/1024:6.1f} MB/s | "
f"ETA {time.strftime('%H:%M:%S', time.gmtime(eta))}",
flush=True
)
last_report = now
if show_progress:
total_time = time.time() - start_time
avg_speed = total_size / total_time if total_time > 0 else 0
print(
f" ✅ DONE | "
f"{total_size/1024/1024:.1f} MB | "
f"avg {avg_speed/1024/1024:.1f} MB/s | "
f"time {time.strftime('%H:%M:%S', time.gmtime(total_time))}",
flush=True
)
return h.digest()
except Exception as e:
print(f"⚠️ HASH ERROR: {path} - {e}")
raise
def get_drive_info():
"""Získá písmeno disku a jeho ID (které se použije jako host_name)."""
print("\n💿 --- NASTAVENÍ SKENOVÁNÍ (EXTERNÍ DISK) ---")
# 1. Písmeno disku
while True:
drive_input = input("📂 Zadejte písmeno disku ve Windows (např. 'E'): ").strip().upper()
drive_letter = drive_input.replace(":", "").replace("\\", "").replace("/", "")
if len(drive_letter) == 1 and drive_letter.isalpha():
drive_root = f"{drive_letter}:\\"
if os.path.isdir(drive_root):
break
else:
print(f"❌ Disk {drive_root} není dostupný.")
else:
print("❌ Neplatný formát.")
# 2. Název disku -> HOST_NAME
while True:
disk_label = input("🏷️ Zadejte ID disku (bude uloženo jako 'host_name', např. '#HD015'): ").strip()
if len(disk_label) >= 2:
break
print("❌ Název je příliš krátký.")
return drive_root, disk_label
def size_allowed(size: int) -> bool:
if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE: return False
if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE: return False
return True
# ==============================
# MAIN
# ==============================
def main():
print("🚀 BLAKE3 External Disk Indexer", flush=True)
print(f"🖥 Running on PC: {REAL_PC_HOSTNAME}", flush=True)
# Získání vstupů
scan_root, disk_hostname = get_drive_info()
print(f"✅ Konfigurace:")
print(f" Zdroj (Windows) : {scan_root}")
print(f" DB Hostname : {disk_hostname}")
print(f" DB Cesty : /Složka/Soubor...")
try:
db = pymysql.connect(**DB_CONFIG)
cur = db.cursor()
except Exception as e:
print(f"❌ DB Connection failed: {e}")
input("Enter pro konec...")
return
print(f"📥 Načítám index pro disk: '{disk_hostname}'...", flush=True)
# === OPTIMALIZACE: Hledáme přesně podle host_name ===
cur.execute("""
SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
FROM file_md5_index
WHERE host_name = %s
""", (disk_hostname,))
# Mapa: { "/Slozka/Soubor.ext": (size, mtime) }
indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()}
print(f"✅ Nalezeno {len(indexed_map):,} souborů v DB pro tento disk.", flush=True)
print("======================================", flush=True)
new_files = 0
skipped = 0
filtered = 0
errors = 0
seen_paths = set()
# --- SCAN ---
for root, dirs, files in os.walk(scan_root):
# Ignorace systémových složek
dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
for fname in files:
disk_path = os.path.join(root, fname)
# 1. Stat (velikost, čas)
try:
stat = os.stat(disk_path)
except OSError:
errors += 1
continue
size = stat.st_size
if not size_allowed(size):
filtered += 1
continue
# 2. Vytvoření čisté cesty pro DB
# E:\Filmy\Avatar.mkv -> Filmy\Avatar.mkv
try:
rel_path = os.path.relpath(disk_path, scan_root)
except ValueError:
errors += 1
continue
# Normalizace na Linux style: Filmy/Avatar.mkv
clean_path = rel_path.replace("\\", "/")
# Přidání lomítka na začátek: /Filmy/Avatar.mkv
if not clean_path.startswith("/"):
clean_path = "/" + clean_path
if clean_path in seen_paths:
continue
seen_paths.add(clean_path)
mtime = int(stat.st_mtime)
# === STRICT CHECK ===
is_match = False
if clean_path in indexed_map:
db_size, db_mtime = indexed_map[clean_path]
if size == db_size and mtime == db_mtime:
is_match = True
if is_match:
skipped += 1
if PRINT_SKIPPED:
print(f"⏭ SKIP {clean_path}", flush=True)
continue
# === INSERT / UPDATE ===
print(" NEW / UPDATED", flush=True)
print(f" File: {clean_path}", flush=True)
print(f" Size: {size:,} B", flush=True)
try:
b3 = compute_blake3(disk_path)
except Exception:
errors += 1
continue
cur.execute("""
INSERT INTO file_md5_index
(os_name, host_name, full_path, file_name, directory,
file_size, mtime, blake3)
VALUES (%s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s)
ON DUPLICATE KEY UPDATE
file_size = VALUES(file_size),
mtime = VALUES(mtime),
blake3 = VALUES(blake3),
updated_at = CURRENT_TIMESTAMP
""", (
OS_NAME, # Např. 'Windows' (kde se to skenovalo)
disk_hostname, # ZDE SE UKLÁDÁ '#HD015'
clean_path, # ZDE SE UKLÁDÁ '/Filmy/Avatar.mkv'
fname,
os.path.dirname(clean_path),
size,
mtime,
b3,
))
new_files += 1
print(f" Hash: {b3.hex()}", flush=True)
print("--------------------------------------", flush=True)
print("======================================", flush=True)
print(f"✅ Hotovo : {new_files}")
print(f"⏭ Shoda : {skipped}")
print(f"⚠️ Chyby : {errors}")
print("🏁 Konec.")
cur.close()
db.close()
# input("\nStiskněte Enter pro ukončení...")
if __name__ == "__main__":
main()