Compare commits
7 Commits
b0275928d2
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 782e7d3fb7 | |||
| a19281c3a4 | |||
| 0c94333abb | |||
| 51c77a8793 | |||
| f052362b31 | |||
| 773e67c9b6 | |||
| 4c420060ec |
38
Mazání Library/20 MazáníPrádnýchAdresářů.py
Normal file
38
Mazání Library/20 MazáníPrádnýchAdresářů.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_empty_folders(target_path, dry_run=True):
|
||||||
|
# Kontrola, zda cesta existuje
|
||||||
|
if not os.path.exists(target_path):
|
||||||
|
print(f"Chyba: Cesta {target_path} neexistuje.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Prohledávám: {target_path}")
|
||||||
|
if dry_run:
|
||||||
|
print("POZOR: Spuštěno v režimu DRY RUN (nic se nemaže)\n")
|
||||||
|
|
||||||
|
# topdown=False je klíčové - začínáme od nejhlubších složek
|
||||||
|
for root, dirs, files in os.walk(target_path, topdown=False):
|
||||||
|
for name in dirs:
|
||||||
|
folder_path = os.path.join(root, name)
|
||||||
|
|
||||||
|
# Kontrola, zda je složka prázdná
|
||||||
|
# Listdir vypíše vše v adresáři (včetně skrytých souborů)
|
||||||
|
if not os.listdir(folder_path):
|
||||||
|
if dry_run:
|
||||||
|
print(f"[DRY RUN] Složka by byla smazána: {folder_path}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
os.rmdir(folder_path)
|
||||||
|
print(f"Smazáno: {folder_path}")
|
||||||
|
except OSError as e:
|
||||||
|
print(f"Chyba při mazání {folder_path}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Cesta k vašemu síťovému sdílení
|
||||||
|
# V Pythonu na Windows použijte r"" (raw string) kvůli zpětným lomítkům
|
||||||
|
path_to_clean = r"\\tower1\#library"
|
||||||
|
|
||||||
|
# Prvně spusťte s dry_run=True, abyste viděli, co se stane
|
||||||
|
cleanup_empty_folders(path_to_clean, dry_run=True)
|
||||||
@@ -15,11 +15,15 @@ import platform
|
|||||||
import sys
|
import sys
|
||||||
from blake3 import blake3
|
from blake3 import blake3
|
||||||
|
|
||||||
|
def get_path_hash(path_str: str) -> bytes:
|
||||||
|
"""Calculates MD5 hash of the path and returns raw 16 bytes for BINARY(16)."""
|
||||||
|
return hashlib.md5(path_str.encode('utf-8')).digest()
|
||||||
|
|
||||||
# ==============================
|
# ==============================
|
||||||
# ⚙️ USER CONFIGURATION
|
# ⚙️ USER CONFIGURATION
|
||||||
# ==============================
|
# ==============================
|
||||||
DISK_DRIVE_LETTER = "p" # (e.g., "E", "F", "P")
|
DISK_DRIVE_LETTER = "z" # (e.g., "E", "F", "P")
|
||||||
DISK_HOSTNAME = "#HD05" # (e.g., "#HD015")
|
DISK_HOSTNAME = "TW22" # (e.g., "#HD015")
|
||||||
|
|
||||||
# 🔒 SAFETY SWITCH
|
# 🔒 SAFETY SWITCH
|
||||||
# True = LIST ONLY (No DB changes). "Simulates" the run.
|
# True = LIST ONLY (No DB changes). "Simulates" the run.
|
||||||
|
|||||||
313
WalkFilesOnBackupHDD/30 WalkBackupHDD.py
Normal file
313
WalkFilesOnBackupHDD/30 WalkBackupHDD.py
Normal file
@@ -0,0 +1,313 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
r"""
|
||||||
|
FAST FILE HASH INDEXER – WINDOWS CLIENT (HARDCODED CONFIG)
|
||||||
|
- Mode: PHYSICAL BACKUP
|
||||||
|
- Hostname in DB = Disk Label (e.g., #HD015)
|
||||||
|
- Path in DB = Relative path (e.g., /Movies/Film.mkv)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import pymysql
|
||||||
|
import socket
|
||||||
|
import platform
|
||||||
|
import sys
|
||||||
|
import hashlib
|
||||||
|
from blake3 import blake3
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# ⚙️ USER CONFIGURATION
|
||||||
|
# ==============================
|
||||||
|
DISK_DRIVE_LETTER = "z" # (e.g., "E", "F", "P")
|
||||||
|
DISK_HOSTNAME = "TW22" # (e.g., "#HD015")
|
||||||
|
|
||||||
|
# 🔒 SAFETY SWITCH
|
||||||
|
DRY_RUN = False
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# TECHNICAL CONFIG
|
||||||
|
# ==============================
|
||||||
|
CHUNK_SIZE = 5 * 1024 * 1024 # 5 MB
|
||||||
|
PROGRESS_MIN_SIZE = 500 * 1024 * 1024 # 500 MB
|
||||||
|
PROGRESS_INTERVAL = 1.0 # seconds
|
||||||
|
|
||||||
|
EXCLUDED_DIRS = {"$RECYCLE.BIN", "System Volume Information", "RECYCLER", "msdownld.tmp"}
|
||||||
|
|
||||||
|
# --- File Size Limits ---
|
||||||
|
FILE_MIN_SIZE = 0
|
||||||
|
FILE_MAX_SIZE = 1024 * 1024 * 1024 * 1024 # 1TB
|
||||||
|
|
||||||
|
# --- DB Config ---
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
"autocommit": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
PRINT_SKIPPED = False # Set True to see files that were already in DB
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# SYSTEM INFO
|
||||||
|
# ==============================
|
||||||
|
REAL_PC_HOSTNAME = socket.gethostname()
|
||||||
|
OS_NAME = platform.system()
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# FUNCTIONS
|
||||||
|
# ==============================
|
||||||
|
|
||||||
|
def get_path_hash(path_str: str) -> bytes:
|
||||||
|
"""Calculates MD5 hash of the path and returns raw 16 bytes for BINARY(16)."""
|
||||||
|
return hashlib.md5(path_str.encode('utf-8')).digest()
|
||||||
|
|
||||||
|
|
||||||
|
def compute_blake3(path: str) -> bytes:
|
||||||
|
h = blake3()
|
||||||
|
total_size = os.path.getsize(path)
|
||||||
|
show_progress = total_size >= PROGRESS_MIN_SIZE
|
||||||
|
|
||||||
|
processed = 0
|
||||||
|
start_time = time.time()
|
||||||
|
last_report = start_time
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
while True:
|
||||||
|
chunk = f.read(CHUNK_SIZE)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
|
||||||
|
h.update(chunk)
|
||||||
|
processed += len(chunk)
|
||||||
|
|
||||||
|
if show_progress:
|
||||||
|
now = time.time()
|
||||||
|
if now - last_report >= PROGRESS_INTERVAL:
|
||||||
|
elapsed = now - start_time
|
||||||
|
speed = processed / elapsed if elapsed > 0 else 0
|
||||||
|
percent = processed / total_size * 100
|
||||||
|
remaining = total_size - processed
|
||||||
|
eta = remaining / speed if speed > 0 else 0
|
||||||
|
|
||||||
|
print(
|
||||||
|
f" ⏳ {percent:6.2f}% | "
|
||||||
|
f"{processed / 1024 / 1024:8.1f} / {total_size / 1024 / 1024:.1f} MB | "
|
||||||
|
f"{speed / 1024 / 1024:6.1f} MB/s | "
|
||||||
|
f"ETA {time.strftime('%H:%M:%S', time.gmtime(eta))}",
|
||||||
|
flush=True
|
||||||
|
)
|
||||||
|
last_report = now
|
||||||
|
|
||||||
|
if show_progress:
|
||||||
|
total_time = time.time() - start_time
|
||||||
|
avg_speed = total_size / total_time if total_time > 0 else 0
|
||||||
|
print(
|
||||||
|
f" ✅ DONE | "
|
||||||
|
f"{total_size / 1024 / 1024:.1f} MB | "
|
||||||
|
f"avg {avg_speed / 1024 / 1024:.1f} MB/s | "
|
||||||
|
f"time {time.strftime('%H:%M:%S', time.gmtime(total_time))}",
|
||||||
|
flush=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return h.digest()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ HASH ERROR: {path} - {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def size_allowed(size: int) -> bool:
|
||||||
|
if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE: return False
|
||||||
|
if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE: return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_db_path(scan_root, disk_path):
|
||||||
|
"""
|
||||||
|
Converts a physical Windows path to the standardized DB format.
|
||||||
|
E:\Movies\File.mkv -> /Movies/File.mkv
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
rel_path = os.path.relpath(disk_path, scan_root)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
clean_path = rel_path.replace("\\", "/")
|
||||||
|
if not clean_path.startswith("/"):
|
||||||
|
clean_path = "/" + clean_path
|
||||||
|
|
||||||
|
return clean_path
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# MAIN
|
||||||
|
# ==============================
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("🚀 BLAKE3 External Disk Indexer (MySQL 9 Compatible)", flush=True)
|
||||||
|
print(f"🖥 Running on PC: {REAL_PC_HOSTNAME}", flush=True)
|
||||||
|
|
||||||
|
if DRY_RUN:
|
||||||
|
print("🛡️ DRY RUN MODE ACTIVE: No changes will be made to DB.", flush=True)
|
||||||
|
else:
|
||||||
|
print("⚠️ LIVE MODE: Changes WILL be committed to DB.", flush=True)
|
||||||
|
|
||||||
|
scan_root = f"{DISK_DRIVE_LETTER}:\\"
|
||||||
|
|
||||||
|
if not os.path.isdir(scan_root):
|
||||||
|
print(f"❌ ERROR: Drive '{scan_root}' not found!")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
db = pymysql.connect(**DB_CONFIG)
|
||||||
|
cur = db.cursor()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ DB Connection failed: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"📥 Loading DB index for: '{DISK_HOSTNAME}'...", flush=True)
|
||||||
|
|
||||||
|
# === LOAD EXISTING DB RECORDS ===
|
||||||
|
# We load path_hash as well for precise deletion
|
||||||
|
cur.execute("""
|
||||||
|
SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
|
||||||
|
FROM file_md5_index
|
||||||
|
WHERE host_name = %s
|
||||||
|
""", (DISK_HOSTNAME,))
|
||||||
|
|
||||||
|
indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()}
|
||||||
|
print(f"✅ Found {len(indexed_map):,} files in DB for this disk.", flush=True)
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# PHASE 1: CLEANUP (DELETE MISSING FILES)
|
||||||
|
# =========================================================
|
||||||
|
print("======================================", flush=True)
|
||||||
|
print("🧹 PHASE 1: Checking for deleted files...", flush=True)
|
||||||
|
|
||||||
|
current_disk_paths = set()
|
||||||
|
for root, dirs, files in os.walk(scan_root):
|
||||||
|
dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
|
||||||
|
for fname in files:
|
||||||
|
disk_path = os.path.join(root, fname)
|
||||||
|
clean_path = normalize_db_path(scan_root, disk_path)
|
||||||
|
if clean_path:
|
||||||
|
current_disk_paths.add(clean_path)
|
||||||
|
|
||||||
|
paths_to_delete = set(indexed_map.keys()) - current_disk_paths
|
||||||
|
|
||||||
|
if paths_to_delete:
|
||||||
|
print(f"🗑️ Found {len(paths_to_delete):,} files to delete from DB.")
|
||||||
|
if DRY_RUN:
|
||||||
|
for p in sorted(list(paths_to_delete))[:20]:
|
||||||
|
print(f" - {p}")
|
||||||
|
else:
|
||||||
|
# Delete using path_hash for index efficiency
|
||||||
|
batch_size = 500
|
||||||
|
to_delete_list = list(paths_to_delete)
|
||||||
|
for i in range(0, len(to_delete_list), batch_size):
|
||||||
|
batch_paths = to_delete_list[i: i + batch_size]
|
||||||
|
# Map paths to their MD5 hashes
|
||||||
|
batch_hashes = [get_path_hash(p) for p in batch_paths]
|
||||||
|
|
||||||
|
format_strings = ','.join(['%s'] * len(batch_hashes))
|
||||||
|
query = f"DELETE FROM file_md5_index WHERE host_name = %s AND path_hash IN ({format_strings})"
|
||||||
|
|
||||||
|
try:
|
||||||
|
cur.execute(query, [DISK_HOSTNAME] + batch_hashes)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error deleting batch: {e}")
|
||||||
|
|
||||||
|
for p in paths_to_delete:
|
||||||
|
del indexed_map[p]
|
||||||
|
print("✅ Cleanup complete.")
|
||||||
|
else:
|
||||||
|
print("✅ No deleted files detected.")
|
||||||
|
|
||||||
|
# =========================================================
|
||||||
|
# PHASE 2: SCAN & UPDATE (HASHING)
|
||||||
|
# =========================================================
|
||||||
|
print("======================================", flush=True)
|
||||||
|
print("🚀 PHASE 2: Scanning for changes & new files...", flush=True)
|
||||||
|
|
||||||
|
new_files = 0
|
||||||
|
skipped = 0
|
||||||
|
errors = 0
|
||||||
|
seen_paths = set()
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(scan_root):
|
||||||
|
dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
|
||||||
|
for fname in files:
|
||||||
|
disk_path = os.path.join(root, fname)
|
||||||
|
try:
|
||||||
|
stat = os.stat(disk_path)
|
||||||
|
except OSError:
|
||||||
|
errors += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
size = stat.st_size
|
||||||
|
if not size_allowed(size):
|
||||||
|
continue
|
||||||
|
|
||||||
|
clean_path = normalize_db_path(scan_root, disk_path)
|
||||||
|
if not clean_path or clean_path in seen_paths:
|
||||||
|
continue
|
||||||
|
seen_paths.add(clean_path)
|
||||||
|
|
||||||
|
mtime = int(stat.st_mtime)
|
||||||
|
|
||||||
|
# Match Check
|
||||||
|
if clean_path in indexed_map:
|
||||||
|
db_size, db_mtime = indexed_map[clean_path]
|
||||||
|
if size == db_size and mtime == db_mtime:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Compute Hashes
|
||||||
|
try:
|
||||||
|
b3_hash = compute_blake3(disk_path)
|
||||||
|
p_hash = get_path_hash(clean_path) # Essential for MySQL 9 Unique Index
|
||||||
|
except Exception:
|
||||||
|
errors += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if DRY_RUN:
|
||||||
|
print(f"🛡️ [DRY RUN] NEW/UPDATE: {clean_path}")
|
||||||
|
new_files += 1
|
||||||
|
else:
|
||||||
|
cur.execute("""
|
||||||
|
INSERT INTO file_md5_index
|
||||||
|
(os_name, host_name, full_path, path_hash, file_name, directory,
|
||||||
|
file_size, mtime, blake3)
|
||||||
|
VALUES (%s, %s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s)
|
||||||
|
ON DUPLICATE KEY UPDATE
|
||||||
|
file_size = VALUES(file_size),
|
||||||
|
mtime = VALUES(mtime),
|
||||||
|
blake3 = VALUES(blake3),
|
||||||
|
updated_at = CURRENT_TIMESTAMP
|
||||||
|
""", (
|
||||||
|
OS_NAME, DISK_HOSTNAME, clean_path, p_hash, fname,
|
||||||
|
os.path.dirname(clean_path), size, mtime, b3_hash
|
||||||
|
))
|
||||||
|
new_files += 1
|
||||||
|
print(f"➕ ADDED: {clean_path} | {b3_hash.hex()[:8]}...")
|
||||||
|
|
||||||
|
print("======================================", flush=True)
|
||||||
|
print(f"✅ Processed : {new_files}")
|
||||||
|
print(f"⏭ Skipped : {skipped}")
|
||||||
|
print(f"🗑 Deleted : {len(paths_to_delete)}")
|
||||||
|
print(f"⚠️ Errors : {errors}")
|
||||||
|
print("🏁 Done.")
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
158
WalkFilesOnBackupHDD/40 TestPathNormalizedinTable.py
Normal file
158
WalkFilesOnBackupHDD/40 TestPathNormalizedinTable.py
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
#!/opt/bin/python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import pymysql
|
||||||
|
import hashlib
|
||||||
|
import posixpath
|
||||||
|
import unicodedata
|
||||||
|
from binascii import hexlify
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# CONFIG
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
}
|
||||||
|
|
||||||
|
HOST_FILTER = "tower1" # None = all hosts
|
||||||
|
LIMIT = None # e.g. 50000 for testing
|
||||||
|
SHOW_EXAMPLES = 20
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# CANONICAL PATH
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
def canonical_path(path_str: str) -> str:
|
||||||
|
if not path_str:
|
||||||
|
return path_str
|
||||||
|
|
||||||
|
path_str = path_str.replace("\\", "/")
|
||||||
|
path_str = posixpath.normpath(path_str)
|
||||||
|
path_str = unicodedata.normalize("NFC", path_str)
|
||||||
|
|
||||||
|
return path_str
|
||||||
|
|
||||||
|
def md5_bytes(path_str: str) -> bytes:
|
||||||
|
return hashlib.md5(path_str.encode("utf-8")).digest()
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# MAIN
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
db = pymysql.connect(**DB_CONFIG)
|
||||||
|
cur = db.cursor(pymysql.cursors.SSCursor)
|
||||||
|
|
||||||
|
sql = """
|
||||||
|
SELECT id, full_path, path_hash
|
||||||
|
FROM file_md5_index
|
||||||
|
"""
|
||||||
|
|
||||||
|
params = []
|
||||||
|
|
||||||
|
if HOST_FILTER:
|
||||||
|
sql += " WHERE host_name = %s"
|
||||||
|
params.append(HOST_FILTER)
|
||||||
|
|
||||||
|
if LIMIT:
|
||||||
|
sql += " LIMIT %s"
|
||||||
|
params.append(LIMIT)
|
||||||
|
|
||||||
|
cur.execute(sql, params)
|
||||||
|
|
||||||
|
total = 0
|
||||||
|
ok = 0
|
||||||
|
path_change = 0
|
||||||
|
hash_change = 0
|
||||||
|
|
||||||
|
examples_path = []
|
||||||
|
examples_hash = []
|
||||||
|
|
||||||
|
for rec_id, full_path, stored_hash in cur:
|
||||||
|
|
||||||
|
total += 1
|
||||||
|
|
||||||
|
canonical = canonical_path(full_path)
|
||||||
|
|
||||||
|
raw_hash = md5_bytes(full_path)
|
||||||
|
canonical_hash = md5_bytes(canonical)
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CASE 1: fully OK
|
||||||
|
# ---------------------------------------------------
|
||||||
|
if full_path == canonical and stored_hash == canonical_hash:
|
||||||
|
ok += 1
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CASE 2: path string would change
|
||||||
|
# ---------------------------------------------------
|
||||||
|
if full_path != canonical:
|
||||||
|
path_change += 1
|
||||||
|
|
||||||
|
if len(examples_path) < SHOW_EXAMPLES:
|
||||||
|
examples_path.append((rec_id, full_path, canonical))
|
||||||
|
|
||||||
|
# ---------------------------------------------------
|
||||||
|
# CASE 3: hash would change
|
||||||
|
# ---------------------------------------------------
|
||||||
|
if stored_hash != canonical_hash:
|
||||||
|
hash_change += 1
|
||||||
|
|
||||||
|
if len(examples_hash) < SHOW_EXAMPLES:
|
||||||
|
examples_hash.append(
|
||||||
|
(rec_id, full_path,
|
||||||
|
hexlify(stored_hash).decode(),
|
||||||
|
hexlify(canonical_hash).decode())
|
||||||
|
)
|
||||||
|
|
||||||
|
if total % 100000 == 0:
|
||||||
|
print(f"Checked {total:,} rows...")
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# REPORT
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("AUDIT SUMMARY")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
print(f"Total rows checked : {total:,}")
|
||||||
|
print(f"OK (already canonical + hash OK) : {ok:,}")
|
||||||
|
print(f"Paths that would change : {path_change:,}")
|
||||||
|
print(f"Hashes that would change : {hash_change:,}")
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
# SHOW EXAMPLES
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
|
||||||
|
if examples_path:
|
||||||
|
print("\n⚠ PATH CHANGE EXAMPLES:")
|
||||||
|
for rec_id, old, new in examples_path:
|
||||||
|
print(f"[id={rec_id}]")
|
||||||
|
print(" DB :", old)
|
||||||
|
print(" NEW:", new)
|
||||||
|
print()
|
||||||
|
|
||||||
|
if examples_hash:
|
||||||
|
print("\n❌ HASH CHANGE EXAMPLES:")
|
||||||
|
for rec_id, path, old_hash, new_hash in examples_hash:
|
||||||
|
print(f"[id={rec_id}] {path}")
|
||||||
|
print(" Stored :", old_hash)
|
||||||
|
print(" New :", new_hash)
|
||||||
|
print()
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
188
WalkFilesOnBackupHDD/50 Onetimepathnormalization.py
Normal file
188
WalkFilesOnBackupHDD/50 Onetimepathnormalization.py
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
ONE-TIME MIGRATION: Normalize full_path (NFC, forward slashes) + recompute path_hash
|
||||||
|
- Targets ONLY one host_name (Tower1 by default)
|
||||||
|
- Safe with UNIQUE(host_name, path_hash)
|
||||||
|
- Handles collisions by skipping conflicting rows and logging them
|
||||||
|
- DRY_RUN supported
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import hashlib
|
||||||
|
import posixpath
|
||||||
|
import unicodedata
|
||||||
|
import pymysql
|
||||||
|
from pymysql.err import IntegrityError
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# CONFIG
|
||||||
|
# =========================
|
||||||
|
HOST_TO_FIX = "Tower" # <-- set your Unraid host_name exactly as stored in DB
|
||||||
|
DRY_RUN = True # <-- first run True; then switch to False to apply
|
||||||
|
BATCH_SELECT_FETCH = 5000 # server-side cursor fetch size (streaming)
|
||||||
|
COMMIT_EVERY = 2000 # commit after N successful updates (when DRY_RUN=False)
|
||||||
|
LOG_EVERY = 50000 # progress print
|
||||||
|
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
"autocommit": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# CANONICALIZATION
|
||||||
|
# =========================
|
||||||
|
def canonical_path(path_str: str) -> str:
|
||||||
|
if not path_str:
|
||||||
|
return path_str
|
||||||
|
path_str = path_str.replace("\\", "/")
|
||||||
|
path_str = posixpath.normpath(path_str)
|
||||||
|
path_str = unicodedata.normalize("NFC", path_str)
|
||||||
|
return path_str
|
||||||
|
|
||||||
|
def md5_bytes(path_str: str) -> bytes:
|
||||||
|
return hashlib.md5(path_str.encode("utf-8")).digest() # 16 raw bytes for BINARY(16)
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# MAIN
|
||||||
|
# =========================
|
||||||
|
def main():
|
||||||
|
print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] 🚀 Tower path_hash migration")
|
||||||
|
print(f"Host: {HOST_TO_FIX}")
|
||||||
|
print(f"DRY_RUN: {DRY_RUN}")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
db = pymysql.connect(**DB_CONFIG)
|
||||||
|
|
||||||
|
# streaming cursor for reading
|
||||||
|
read_cur = db.cursor(pymysql.cursors.SSCursor)
|
||||||
|
read_cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT id, full_path, path_hash
|
||||||
|
FROM file_md5_index
|
||||||
|
WHERE host_name = %s
|
||||||
|
""",
|
||||||
|
(HOST_TO_FIX,),
|
||||||
|
)
|
||||||
|
|
||||||
|
# normal cursor for updates
|
||||||
|
upd_cur = db.cursor()
|
||||||
|
|
||||||
|
total = 0
|
||||||
|
needs_change = 0
|
||||||
|
updated_ok = 0
|
||||||
|
collisions = 0
|
||||||
|
other_errors = 0
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
pending_commits = 0
|
||||||
|
|
||||||
|
# Optional: make server-side cursor fetch a bit larger
|
||||||
|
# (PyMySQL streams regardless; this just makes loop smoother)
|
||||||
|
# Not strictly necessary.
|
||||||
|
|
||||||
|
while True:
|
||||||
|
rows = read_cur.fetchmany(BATCH_SELECT_FETCH)
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
for rec_id, full_path, stored_hash in rows:
|
||||||
|
total += 1
|
||||||
|
|
||||||
|
new_path = canonical_path(full_path)
|
||||||
|
new_hash = md5_bytes(new_path)
|
||||||
|
|
||||||
|
# already canonical & correct
|
||||||
|
if new_path == full_path and new_hash == stored_hash:
|
||||||
|
if total % LOG_EVERY == 0:
|
||||||
|
elapsed = time.time() - start
|
||||||
|
print(f"Checked {total:,} | needs_change {needs_change:,} | updated {updated_ok:,} | collisions {collisions:,} | {elapsed:.1f}s")
|
||||||
|
sys.stdout.flush()
|
||||||
|
continue
|
||||||
|
|
||||||
|
needs_change += 1
|
||||||
|
|
||||||
|
if DRY_RUN:
|
||||||
|
# in dry-run we just count; no DB writes
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Update with collision handling via UNIQUE(host_name, path_hash)
|
||||||
|
try:
|
||||||
|
# Use a savepoint so a duplicate-key error doesn't kill the whole transaction
|
||||||
|
upd_cur.execute("SAVEPOINT sp_one;")
|
||||||
|
|
||||||
|
upd_cur.execute(
|
||||||
|
"""
|
||||||
|
UPDATE file_md5_index
|
||||||
|
SET full_path = %s,
|
||||||
|
path_hash = %s
|
||||||
|
WHERE id = %s
|
||||||
|
""",
|
||||||
|
(new_path, new_hash, rec_id),
|
||||||
|
)
|
||||||
|
|
||||||
|
upd_cur.execute("RELEASE SAVEPOINT sp_one;")
|
||||||
|
|
||||||
|
updated_ok += 1
|
||||||
|
pending_commits += 1
|
||||||
|
|
||||||
|
if pending_commits >= COMMIT_EVERY:
|
||||||
|
db.commit()
|
||||||
|
pending_commits = 0
|
||||||
|
|
||||||
|
except IntegrityError as e:
|
||||||
|
# Duplicate key = collision on (host_name, path_hash)
|
||||||
|
# This means some OTHER row in the same host already has this new_hash.
|
||||||
|
upd_cur.execute("ROLLBACK TO SAVEPOINT sp_one;")
|
||||||
|
upd_cur.execute("RELEASE SAVEPOINT sp_one;")
|
||||||
|
collisions += 1
|
||||||
|
|
||||||
|
# Print a short line occasionally (avoid huge spam)
|
||||||
|
if collisions <= 50 or collisions % 1000 == 0:
|
||||||
|
print(f"⚠ COLLISION id={rec_id} | {e}")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
upd_cur.execute("ROLLBACK TO SAVEPOINT sp_one;")
|
||||||
|
upd_cur.execute("RELEASE SAVEPOINT sp_one;")
|
||||||
|
other_errors += 1
|
||||||
|
if other_errors <= 50 or other_errors % 1000 == 0:
|
||||||
|
print(f"❌ ERROR id={rec_id} | {e}")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
if total % LOG_EVERY == 0:
|
||||||
|
elapsed = time.time() - start
|
||||||
|
print(f"Checked {total:,} | needs_change {needs_change:,} | updated {updated_ok:,} | collisions {collisions:,} | {elapsed:.1f}s")
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
# finalize
|
||||||
|
if not DRY_RUN:
|
||||||
|
if pending_commits:
|
||||||
|
db.commit()
|
||||||
|
print("✅ Migration finished (committed).")
|
||||||
|
else:
|
||||||
|
print("⚠ DRY_RUN finished (no changes written).")
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"Total rows checked : {total:,}")
|
||||||
|
print(f"Rows needing change : {needs_change:,}")
|
||||||
|
print(f"Rows updated : {updated_ok:,}")
|
||||||
|
print(f"Collisions (skipped) : {collisions:,}")
|
||||||
|
print(f"Other errors : {other_errors:,}")
|
||||||
|
print(f"Elapsed : {elapsed:.1f}s")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
read_cur.close()
|
||||||
|
upd_cur.close()
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
146
WalkFilesOnBackupHDD/51 testthoseneedchangewhetherok.py
Normal file
146
WalkFilesOnBackupHDD/51 testthoseneedchangewhetherok.py
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
import hashlib
|
||||||
|
import posixpath
|
||||||
|
import unicodedata
|
||||||
|
import pymysql
|
||||||
|
import time
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# CONFIG
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
HOST_TO_CHECK = "Tower"
|
||||||
|
WINDOWS_UNC_BASE = r"\\tower"
|
||||||
|
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
}
|
||||||
|
|
||||||
|
PRINT_FIRST_CHANGES = 20
|
||||||
|
LOG_EVERY = 5000
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# CANONICAL
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
def canonical_path(path_str):
|
||||||
|
path_str = path_str.replace("\\", "/")
|
||||||
|
path_str = posixpath.normpath(path_str)
|
||||||
|
path_str = unicodedata.normalize("NFC", path_str)
|
||||||
|
return path_str
|
||||||
|
|
||||||
|
def md5_bytes(path_str):
|
||||||
|
return hashlib.md5(path_str.encode("utf-8")).digest()
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# PATH MAP
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
def linux_to_windows_unc(linux_path):
|
||||||
|
rel = linux_path[len("/mnt/user/"):]
|
||||||
|
return os.path.join(WINDOWS_UNC_BASE, *rel.split("/"))
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# MAIN
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
print("🔍 Tower Canonical Path SMB Verification")
|
||||||
|
print(f"Host: {HOST_TO_CHECK}")
|
||||||
|
print(f"UNC Base: {WINDOWS_UNC_BASE}")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
db = pymysql.connect(**DB_CONFIG)
|
||||||
|
cur = db.cursor(pymysql.cursors.SSCursor)
|
||||||
|
|
||||||
|
cur.execute("""
|
||||||
|
SELECT id, full_path, path_hash
|
||||||
|
FROM file_md5_index
|
||||||
|
WHERE host_name = %s
|
||||||
|
""", (HOST_TO_CHECK,))
|
||||||
|
|
||||||
|
total = 0
|
||||||
|
needs_change = 0
|
||||||
|
exists_ok = 0
|
||||||
|
missing = 0
|
||||||
|
|
||||||
|
printed_changes = 0
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
for rec_id, full_path, stored_hash in cur:
|
||||||
|
|
||||||
|
total += 1
|
||||||
|
|
||||||
|
new_path = canonical_path(full_path)
|
||||||
|
new_hash = md5_bytes(new_path)
|
||||||
|
|
||||||
|
# Already canonical
|
||||||
|
if new_path == full_path and new_hash == stored_hash:
|
||||||
|
continue
|
||||||
|
|
||||||
|
needs_change += 1
|
||||||
|
|
||||||
|
win_path = linux_to_windows_unc(new_path)
|
||||||
|
exists = os.path.exists(win_path)
|
||||||
|
|
||||||
|
if exists:
|
||||||
|
exists_ok += 1
|
||||||
|
else:
|
||||||
|
missing += 1
|
||||||
|
|
||||||
|
# ---- Print first examples ----
|
||||||
|
if printed_changes < PRINT_FIRST_CHANGES:
|
||||||
|
print("\n🔧 CHANGE DETECTED")
|
||||||
|
print(f"ID : {rec_id}")
|
||||||
|
print(f"DB PATH : {full_path}")
|
||||||
|
print(f"NEW PATH : {new_path}")
|
||||||
|
print(f"WIN PATH : {win_path}")
|
||||||
|
print(f"Exists : {exists}")
|
||||||
|
printed_changes += 1
|
||||||
|
|
||||||
|
# ---- Progress ----
|
||||||
|
if total % LOG_EVERY == 0:
|
||||||
|
elapsed = time.time() - start
|
||||||
|
rate = total / elapsed if elapsed else 0
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"📊 Checked {total:,} rows | "
|
||||||
|
f"Needs change {needs_change:,} | "
|
||||||
|
f"Exists {exists_ok:,} | "
|
||||||
|
f"Missing {missing:,} | "
|
||||||
|
f"{rate:,.0f} rows/sec"
|
||||||
|
)
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# SUMMARY
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("✅ FINAL SUMMARY")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"Total scanned : {total:,}")
|
||||||
|
print(f"Needs change : {needs_change:,}")
|
||||||
|
print(f"Exists on Tower : {exists_ok:,}")
|
||||||
|
print(f"Missing on Tower : {missing:,}")
|
||||||
|
print(f"Runtime : {elapsed:.1f}s")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
139
WalkFilesOnBackupHDD/53 towerpathcorrection.py
Normal file
139
WalkFilesOnBackupHDD/53 towerpathcorrection.py
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
TOWER PATH NORMALIZATION MIGRATION
|
||||||
|
----------------------------------
|
||||||
|
✔ Normalizes full_path → NFC canonical
|
||||||
|
✔ Recalculates path_hash
|
||||||
|
✔ Uses two DB connections (streaming safe)
|
||||||
|
✔ Idempotent (safe to rerun)
|
||||||
|
✔ Production safe
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pymysql
|
||||||
|
import hashlib
|
||||||
|
import posixpath
|
||||||
|
import unicodedata
|
||||||
|
import time
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# CONFIG
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
HOST_TO_FIX = "tower1"
|
||||||
|
BATCH_FETCH = 5000
|
||||||
|
COMMIT_EVERY = 2000
|
||||||
|
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
"autocommit": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# CANONICALIZATION
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
def canonical_path(path_str: str) -> str:
|
||||||
|
path_str = path_str.replace("\\", "/")
|
||||||
|
path_str = posixpath.normpath(path_str)
|
||||||
|
path_str = unicodedata.normalize("NFC", path_str)
|
||||||
|
return path_str
|
||||||
|
|
||||||
|
def md5_bytes(path_str: str) -> bytes:
|
||||||
|
return hashlib.md5(path_str.encode("utf-8")).digest()
|
||||||
|
|
||||||
|
# =========================
|
||||||
|
# MAIN
|
||||||
|
# =========================
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
print("🚀 TOWER PATH NORMALIZATION MIGRATION")
|
||||||
|
print(f"Host: {HOST_TO_FIX}")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
# --- TWO CONNECTIONS ---
|
||||||
|
db_read = pymysql.connect(**DB_CONFIG)
|
||||||
|
db_write = pymysql.connect(**DB_CONFIG)
|
||||||
|
|
||||||
|
read_cur = db_read.cursor(pymysql.cursors.SSCursor)
|
||||||
|
write_cur = db_write.cursor()
|
||||||
|
|
||||||
|
read_cur.execute("""
|
||||||
|
SELECT id, full_path, path_hash
|
||||||
|
FROM file_md5_index
|
||||||
|
WHERE host_name = %s
|
||||||
|
""", (HOST_TO_FIX,))
|
||||||
|
|
||||||
|
total = 0
|
||||||
|
updated = 0
|
||||||
|
skipped = 0
|
||||||
|
pending_commit = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
|
||||||
|
rows = read_cur.fetchmany(BATCH_FETCH)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
for rec_id, full_path, stored_hash in rows:
|
||||||
|
|
||||||
|
total += 1
|
||||||
|
|
||||||
|
new_path = canonical_path(full_path)
|
||||||
|
new_hash = md5_bytes(new_path)
|
||||||
|
|
||||||
|
if new_path == full_path and new_hash == stored_hash:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
write_cur.execute("""
|
||||||
|
UPDATE file_md5_index
|
||||||
|
SET full_path = %s,
|
||||||
|
path_hash = %s
|
||||||
|
WHERE id = %s
|
||||||
|
""", (new_path, new_hash, rec_id))
|
||||||
|
|
||||||
|
updated += 1
|
||||||
|
pending_commit += 1
|
||||||
|
|
||||||
|
if pending_commit >= COMMIT_EVERY:
|
||||||
|
db_write.commit()
|
||||||
|
pending_commit = 0
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"Checked {total:,} | Updated {updated:,} | Skipped {skipped:,}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if pending_commit:
|
||||||
|
db_write.commit()
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("✅ MIGRATION FINISHED")
|
||||||
|
print("=" * 70)
|
||||||
|
print(f"Total checked : {total:,}")
|
||||||
|
print(f"Rows updated : {updated:,}")
|
||||||
|
print(f"Rows skipped : {skipped:,}")
|
||||||
|
print(f"Runtime : {elapsed:.1f}s")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
read_cur.close()
|
||||||
|
write_cur.close()
|
||||||
|
db_read.close()
|
||||||
|
db_write.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
123
WalkFilesOnBackupHDD/54 53_aktualizovanýspolemhotovo.py
Normal file
123
WalkFilesOnBackupHDD/54 53_aktualizovanýspolemhotovo.py
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import pymysql
|
||||||
|
import hashlib
|
||||||
|
import posixpath
|
||||||
|
import unicodedata
|
||||||
|
import time
|
||||||
|
from pymysql.err import IntegrityError
|
||||||
|
|
||||||
|
HOST_TO_FIX = "tower1"
|
||||||
|
BATCH_FETCH = 5000
|
||||||
|
COMMIT_EVERY = 2000
|
||||||
|
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.50",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "torrents",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
"autocommit": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
def canonical_path(path_str: str) -> str:
|
||||||
|
path_str = path_str.replace("\\", "/")
|
||||||
|
path_str = posixpath.normpath(path_str)
|
||||||
|
path_str = unicodedata.normalize("NFC", path_str)
|
||||||
|
return path_str
|
||||||
|
|
||||||
|
def md5_bytes(path_str: str) -> bytes:
|
||||||
|
return hashlib.md5(path_str.encode("utf-8")).digest()
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
print("🚀 PATH NORMALIZATION WITH TEMP FLAG")
|
||||||
|
|
||||||
|
db_read = pymysql.connect(**DB_CONFIG)
|
||||||
|
db_write = pymysql.connect(**DB_CONFIG)
|
||||||
|
|
||||||
|
read_cur = db_read.cursor(pymysql.cursors.SSCursor)
|
||||||
|
write_cur = db_write.cursor()
|
||||||
|
|
||||||
|
read_cur.execute("""
|
||||||
|
SELECT id, full_path, path_hash
|
||||||
|
FROM file_md5_index
|
||||||
|
WHERE host_name=%s
|
||||||
|
AND path_corrected=0
|
||||||
|
""", (HOST_TO_FIX,))
|
||||||
|
|
||||||
|
total = updated = skipped = deleted = 0
|
||||||
|
pending_commit = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
|
||||||
|
rows = read_cur.fetchmany(BATCH_FETCH)
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
for rec_id, full_path, stored_hash in rows:
|
||||||
|
|
||||||
|
total += 1
|
||||||
|
|
||||||
|
new_path = canonical_path(full_path)
|
||||||
|
new_hash = md5_bytes(new_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
# už je canonical → jen označit
|
||||||
|
if new_path == full_path and new_hash == stored_hash:
|
||||||
|
|
||||||
|
write_cur.execute("""
|
||||||
|
UPDATE file_md5_index
|
||||||
|
SET path_corrected=1
|
||||||
|
WHERE id=%s
|
||||||
|
""", (rec_id,))
|
||||||
|
|
||||||
|
skipped += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
write_cur.execute("""
|
||||||
|
UPDATE file_md5_index
|
||||||
|
SET full_path=%s,
|
||||||
|
path_hash=%s,
|
||||||
|
path_corrected=1
|
||||||
|
WHERE id=%s
|
||||||
|
""", (new_path, new_hash, rec_id))
|
||||||
|
|
||||||
|
updated += 1
|
||||||
|
|
||||||
|
except IntegrityError as e:
|
||||||
|
|
||||||
|
# duplicate canonical path → smazat
|
||||||
|
if e.args[0] == 1062:
|
||||||
|
write_cur.execute(
|
||||||
|
"DELETE FROM file_md5_index WHERE id=%s",
|
||||||
|
(rec_id,)
|
||||||
|
)
|
||||||
|
deleted += 1
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
pending_commit += 1
|
||||||
|
|
||||||
|
if pending_commit >= COMMIT_EVERY:
|
||||||
|
db_write.commit()
|
||||||
|
pending_commit = 0
|
||||||
|
|
||||||
|
print(f"Checked {total:,} | Updated {updated:,} | Skipped {skipped:,} | Deleted {deleted:,}")
|
||||||
|
|
||||||
|
if pending_commit:
|
||||||
|
db_write.commit()
|
||||||
|
|
||||||
|
read_cur.close()
|
||||||
|
write_cur.close()
|
||||||
|
db_read.close()
|
||||||
|
db_write.close()
|
||||||
|
|
||||||
|
print("✅ FINISHED")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user