diff --git a/WalkFilesOnBackupHDD/40 TestPathNormalizedinTable.py b/WalkFilesOnBackupHDD/40 TestPathNormalizedinTable.py new file mode 100644 index 0000000..a5702fb --- /dev/null +++ b/WalkFilesOnBackupHDD/40 TestPathNormalizedinTable.py @@ -0,0 +1,158 @@ +#!/opt/bin/python3 +# -*- coding: utf-8 -*- + +import pymysql +import hashlib +import posixpath +import unicodedata +from binascii import hexlify + +# ============================================================ +# CONFIG +# ============================================================ + +DB_CONFIG = { + "host": "192.168.1.50", + "port": 3306, + "user": "root", + "password": "Vlado9674+", + "database": "torrents", + "charset": "utf8mb4", +} + +HOST_FILTER = "tower" # None = all hosts +LIMIT = None # e.g. 50000 for testing +SHOW_EXAMPLES = 20 + +# ============================================================ +# CANONICAL PATH +# ============================================================ + +def canonical_path(path_str: str) -> str: + if not path_str: + return path_str + + path_str = path_str.replace("\\", "/") + path_str = posixpath.normpath(path_str) + path_str = unicodedata.normalize("NFC", path_str) + + return path_str + +def md5_bytes(path_str: str) -> bytes: + return hashlib.md5(path_str.encode("utf-8")).digest() + +# ============================================================ +# MAIN +# ============================================================ + +def main(): + + db = pymysql.connect(**DB_CONFIG) + cur = db.cursor(pymysql.cursors.SSCursor) + + sql = """ + SELECT id, full_path, path_hash + FROM file_md5_index + """ + + params = [] + + if HOST_FILTER: + sql += " WHERE host_name = %s" + params.append(HOST_FILTER) + + if LIMIT: + sql += " LIMIT %s" + params.append(LIMIT) + + cur.execute(sql, params) + + total = 0 + ok = 0 + path_change = 0 + hash_change = 0 + + examples_path = [] + examples_hash = [] + + for rec_id, full_path, stored_hash in cur: + + total += 1 + + canonical = canonical_path(full_path) + + raw_hash = md5_bytes(full_path) + canonical_hash = md5_bytes(canonical) + + # --------------------------------------------------- + # CASE 1: fully OK + # --------------------------------------------------- + if full_path == canonical and stored_hash == canonical_hash: + ok += 1 + + # --------------------------------------------------- + # CASE 2: path string would change + # --------------------------------------------------- + if full_path != canonical: + path_change += 1 + + if len(examples_path) < SHOW_EXAMPLES: + examples_path.append((rec_id, full_path, canonical)) + + # --------------------------------------------------- + # CASE 3: hash would change + # --------------------------------------------------- + if stored_hash != canonical_hash: + hash_change += 1 + + if len(examples_hash) < SHOW_EXAMPLES: + examples_hash.append( + (rec_id, full_path, + hexlify(stored_hash).decode(), + hexlify(canonical_hash).decode()) + ) + + if total % 100000 == 0: + print(f"Checked {total:,} rows...") + + # ============================================================ + # REPORT + # ============================================================ + + print("\n" + "=" * 70) + print("AUDIT SUMMARY") + print("=" * 70) + + print(f"Total rows checked : {total:,}") + print(f"OK (already canonical + hash OK) : {ok:,}") + print(f"Paths that would change : {path_change:,}") + print(f"Hashes that would change : {hash_change:,}") + + print("=" * 70) + + # ------------------------------------------------------------ + # SHOW EXAMPLES + # ------------------------------------------------------------ + + if examples_path: + print("\n⚠ PATH CHANGE EXAMPLES:") + for rec_id, old, new in examples_path: + print(f"[id={rec_id}]") + print(" DB :", old) + print(" NEW:", new) + print() + + if examples_hash: + print("\nāŒ HASH CHANGE EXAMPLES:") + for rec_id, path, old_hash, new_hash in examples_hash: + print(f"[id={rec_id}] {path}") + print(" Stored :", old_hash) + print(" New :", new_hash) + print() + + cur.close() + db.close() + + +if __name__ == "__main__": + main() diff --git a/WalkFilesOnBackupHDD/50 Onetimepathnormalization.py b/WalkFilesOnBackupHDD/50 Onetimepathnormalization.py new file mode 100644 index 0000000..b68871e --- /dev/null +++ b/WalkFilesOnBackupHDD/50 Onetimepathnormalization.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +ONE-TIME MIGRATION: Normalize full_path (NFC, forward slashes) + recompute path_hash +- Targets ONLY one host_name (Tower1 by default) +- Safe with UNIQUE(host_name, path_hash) +- Handles collisions by skipping conflicting rows and logging them +- DRY_RUN supported +""" + +import sys +import time +import hashlib +import posixpath +import unicodedata +import pymysql +from pymysql.err import IntegrityError + +# ========================= +# CONFIG +# ========================= +HOST_TO_FIX = "Tower" # <-- set your Unraid host_name exactly as stored in DB +DRY_RUN = True # <-- first run True; then switch to False to apply +BATCH_SELECT_FETCH = 5000 # server-side cursor fetch size (streaming) +COMMIT_EVERY = 2000 # commit after N successful updates (when DRY_RUN=False) +LOG_EVERY = 50000 # progress print + +DB_CONFIG = { + "host": "192.168.1.50", + "port": 3306, + "user": "root", + "password": "Vlado9674+", + "database": "torrents", + "charset": "utf8mb4", + "autocommit": False, +} + +# ========================= +# CANONICALIZATION +# ========================= +def canonical_path(path_str: str) -> str: + if not path_str: + return path_str + path_str = path_str.replace("\\", "/") + path_str = posixpath.normpath(path_str) + path_str = unicodedata.normalize("NFC", path_str) + return path_str + +def md5_bytes(path_str: str) -> bytes: + return hashlib.md5(path_str.encode("utf-8")).digest() # 16 raw bytes for BINARY(16) + +# ========================= +# MAIN +# ========================= +def main(): + print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] šŸš€ Tower path_hash migration") + print(f"Host: {HOST_TO_FIX}") + print(f"DRY_RUN: {DRY_RUN}") + sys.stdout.flush() + + db = pymysql.connect(**DB_CONFIG) + + # streaming cursor for reading + read_cur = db.cursor(pymysql.cursors.SSCursor) + read_cur.execute( + """ + SELECT id, full_path, path_hash + FROM file_md5_index + WHERE host_name = %s + """, + (HOST_TO_FIX,), + ) + + # normal cursor for updates + upd_cur = db.cursor() + + total = 0 + needs_change = 0 + updated_ok = 0 + collisions = 0 + other_errors = 0 + + start = time.time() + pending_commits = 0 + + # Optional: make server-side cursor fetch a bit larger + # (PyMySQL streams regardless; this just makes loop smoother) + # Not strictly necessary. + + while True: + rows = read_cur.fetchmany(BATCH_SELECT_FETCH) + if not rows: + break + + for rec_id, full_path, stored_hash in rows: + total += 1 + + new_path = canonical_path(full_path) + new_hash = md5_bytes(new_path) + + # already canonical & correct + if new_path == full_path and new_hash == stored_hash: + if total % LOG_EVERY == 0: + elapsed = time.time() - start + print(f"Checked {total:,} | needs_change {needs_change:,} | updated {updated_ok:,} | collisions {collisions:,} | {elapsed:.1f}s") + sys.stdout.flush() + continue + + needs_change += 1 + + if DRY_RUN: + # in dry-run we just count; no DB writes + continue + + # Update with collision handling via UNIQUE(host_name, path_hash) + try: + # Use a savepoint so a duplicate-key error doesn't kill the whole transaction + upd_cur.execute("SAVEPOINT sp_one;") + + upd_cur.execute( + """ + UPDATE file_md5_index + SET full_path = %s, + path_hash = %s + WHERE id = %s + """, + (new_path, new_hash, rec_id), + ) + + upd_cur.execute("RELEASE SAVEPOINT sp_one;") + + updated_ok += 1 + pending_commits += 1 + + if pending_commits >= COMMIT_EVERY: + db.commit() + pending_commits = 0 + + except IntegrityError as e: + # Duplicate key = collision on (host_name, path_hash) + # This means some OTHER row in the same host already has this new_hash. + upd_cur.execute("ROLLBACK TO SAVEPOINT sp_one;") + upd_cur.execute("RELEASE SAVEPOINT sp_one;") + collisions += 1 + + # Print a short line occasionally (avoid huge spam) + if collisions <= 50 or collisions % 1000 == 0: + print(f"⚠ COLLISION id={rec_id} | {e}") + sys.stdout.flush() + + except Exception as e: + upd_cur.execute("ROLLBACK TO SAVEPOINT sp_one;") + upd_cur.execute("RELEASE SAVEPOINT sp_one;") + other_errors += 1 + if other_errors <= 50 or other_errors % 1000 == 0: + print(f"āŒ ERROR id={rec_id} | {e}") + sys.stdout.flush() + + if total % LOG_EVERY == 0: + elapsed = time.time() - start + print(f"Checked {total:,} | needs_change {needs_change:,} | updated {updated_ok:,} | collisions {collisions:,} | {elapsed:.1f}s") + sys.stdout.flush() + + # finalize + if not DRY_RUN: + if pending_commits: + db.commit() + print("āœ… Migration finished (committed).") + else: + print("⚠ DRY_RUN finished (no changes written).") + + elapsed = time.time() - start + print("=" * 70) + print(f"Total rows checked : {total:,}") + print(f"Rows needing change : {needs_change:,}") + print(f"Rows updated : {updated_ok:,}") + print(f"Collisions (skipped) : {collisions:,}") + print(f"Other errors : {other_errors:,}") + print(f"Elapsed : {elapsed:.1f}s") + print("=" * 70) + + read_cur.close() + upd_cur.close() + db.close() + +if __name__ == "__main__": + main() diff --git a/WalkFilesOnBackupHDD/51 testthoseneedchangewhetherok.py b/WalkFilesOnBackupHDD/51 testthoseneedchangewhetherok.py new file mode 100644 index 0000000..f6cb669 --- /dev/null +++ b/WalkFilesOnBackupHDD/51 testthoseneedchangewhetherok.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import hashlib +import posixpath +import unicodedata +import pymysql +import time + +# ========================= +# CONFIG +# ========================= + +HOST_TO_CHECK = "Tower" +WINDOWS_UNC_BASE = r"\\tower" + +DB_CONFIG = { + "host": "192.168.1.50", + "port": 3306, + "user": "root", + "password": "Vlado9674+", + "database": "torrents", + "charset": "utf8mb4", +} + +PRINT_FIRST_CHANGES = 20 +LOG_EVERY = 5000 + +# ========================= +# CANONICAL +# ========================= + +def canonical_path(path_str): + path_str = path_str.replace("\\", "/") + path_str = posixpath.normpath(path_str) + path_str = unicodedata.normalize("NFC", path_str) + return path_str + +def md5_bytes(path_str): + return hashlib.md5(path_str.encode("utf-8")).digest() + +# ========================= +# PATH MAP +# ========================= + +def linux_to_windows_unc(linux_path): + rel = linux_path[len("/mnt/user/"):] + return os.path.join(WINDOWS_UNC_BASE, *rel.split("/")) + +# ========================= +# MAIN +# ========================= + +def main(): + + print("=" * 70) + print("šŸ” Tower Canonical Path SMB Verification") + print(f"Host: {HOST_TO_CHECK}") + print(f"UNC Base: {WINDOWS_UNC_BASE}") + print("=" * 70) + + db = pymysql.connect(**DB_CONFIG) + cur = db.cursor(pymysql.cursors.SSCursor) + + cur.execute(""" + SELECT id, full_path, path_hash + FROM file_md5_index + WHERE host_name = %s + """, (HOST_TO_CHECK,)) + + total = 0 + needs_change = 0 + exists_ok = 0 + missing = 0 + + printed_changes = 0 + + start = time.time() + + for rec_id, full_path, stored_hash in cur: + + total += 1 + + new_path = canonical_path(full_path) + new_hash = md5_bytes(new_path) + + # Already canonical + if new_path == full_path and new_hash == stored_hash: + continue + + needs_change += 1 + + win_path = linux_to_windows_unc(new_path) + exists = os.path.exists(win_path) + + if exists: + exists_ok += 1 + else: + missing += 1 + + # ---- Print first examples ---- + if printed_changes < PRINT_FIRST_CHANGES: + print("\nšŸ”§ CHANGE DETECTED") + print(f"ID : {rec_id}") + print(f"DB PATH : {full_path}") + print(f"NEW PATH : {new_path}") + print(f"WIN PATH : {win_path}") + print(f"Exists : {exists}") + printed_changes += 1 + + # ---- Progress ---- + if total % LOG_EVERY == 0: + elapsed = time.time() - start + rate = total / elapsed if elapsed else 0 + + print( + f"šŸ“Š Checked {total:,} rows | " + f"Needs change {needs_change:,} | " + f"Exists {exists_ok:,} | " + f"Missing {missing:,} | " + f"{rate:,.0f} rows/sec" + ) + + # ========================= + # SUMMARY + # ========================= + + elapsed = time.time() - start + + print("\n" + "=" * 70) + print("āœ… FINAL SUMMARY") + print("=" * 70) + print(f"Total scanned : {total:,}") + print(f"Needs change : {needs_change:,}") + print(f"Exists on Tower : {exists_ok:,}") + print(f"Missing on Tower : {missing:,}") + print(f"Runtime : {elapsed:.1f}s") + print("=" * 70) + + cur.close() + db.close() + + +if __name__ == "__main__": + main() diff --git a/WalkFilesOnBackupHDD/53 towerpathcorrection.py b/WalkFilesOnBackupHDD/53 towerpathcorrection.py new file mode 100644 index 0000000..27a5444 --- /dev/null +++ b/WalkFilesOnBackupHDD/53 towerpathcorrection.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +TOWER PATH NORMALIZATION MIGRATION +---------------------------------- +āœ” Normalizes full_path → NFC canonical +āœ” Recalculates path_hash +āœ” Uses two DB connections (streaming safe) +āœ” Idempotent (safe to rerun) +āœ” Production safe +""" + +import pymysql +import hashlib +import posixpath +import unicodedata +import time + +# ========================= +# CONFIG +# ========================= + +HOST_TO_FIX = "tower" +BATCH_FETCH = 5000 +COMMIT_EVERY = 2000 + +DB_CONFIG = { + "host": "192.168.1.50", + "port": 3306, + "user": "root", + "password": "Vlado9674+", + "database": "torrents", + "charset": "utf8mb4", + "autocommit": False, +} + +# ========================= +# CANONICALIZATION +# ========================= + +def canonical_path(path_str: str) -> str: + path_str = path_str.replace("\\", "/") + path_str = posixpath.normpath(path_str) + path_str = unicodedata.normalize("NFC", path_str) + return path_str + +def md5_bytes(path_str: str) -> bytes: + return hashlib.md5(path_str.encode("utf-8")).digest() + +# ========================= +# MAIN +# ========================= + +def main(): + + print("=" * 70) + print("šŸš€ TOWER PATH NORMALIZATION MIGRATION") + print(f"Host: {HOST_TO_FIX}") + print("=" * 70) + + start = time.time() + + # --- TWO CONNECTIONS --- + db_read = pymysql.connect(**DB_CONFIG) + db_write = pymysql.connect(**DB_CONFIG) + + read_cur = db_read.cursor(pymysql.cursors.SSCursor) + write_cur = db_write.cursor() + + read_cur.execute(""" + SELECT id, full_path, path_hash + FROM file_md5_index + WHERE host_name = %s + """, (HOST_TO_FIX,)) + + total = 0 + updated = 0 + skipped = 0 + pending_commit = 0 + + while True: + + rows = read_cur.fetchmany(BATCH_FETCH) + + if not rows: + break + + for rec_id, full_path, stored_hash in rows: + + total += 1 + + new_path = canonical_path(full_path) + new_hash = md5_bytes(new_path) + + if new_path == full_path and new_hash == stored_hash: + skipped += 1 + continue + + write_cur.execute(""" + UPDATE file_md5_index + SET full_path = %s, + path_hash = %s + WHERE id = %s + """, (new_path, new_hash, rec_id)) + + updated += 1 + pending_commit += 1 + + if pending_commit >= COMMIT_EVERY: + db_write.commit() + pending_commit = 0 + + print( + f"Checked {total:,} | Updated {updated:,} | Skipped {skipped:,}" + ) + + if pending_commit: + db_write.commit() + + elapsed = time.time() - start + + print("\n" + "=" * 70) + print("āœ… MIGRATION FINISHED") + print("=" * 70) + print(f"Total checked : {total:,}") + print(f"Rows updated : {updated:,}") + print(f"Rows skipped : {skipped:,}") + print(f"Runtime : {elapsed:.1f}s") + print("=" * 70) + + read_cur.close() + write_cur.close() + db_read.close() + db_write.close() + + +if __name__ == "__main__": + main()