#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ TOWER PATH NORMALIZATION MIGRATION ---------------------------------- ✔ Normalizes full_path → NFC canonical ✔ Recalculates path_hash ✔ Uses two DB connections (streaming safe) ✔ Idempotent (safe to rerun) ✔ Production safe """ import pymysql import hashlib import posixpath import unicodedata import time # ========================= # CONFIG # ========================= HOST_TO_FIX = "tower" BATCH_FETCH = 5000 COMMIT_EVERY = 2000 DB_CONFIG = { "host": "192.168.1.50", "port": 3306, "user": "root", "password": "Vlado9674+", "database": "torrents", "charset": "utf8mb4", "autocommit": False, } # ========================= # CANONICALIZATION # ========================= def canonical_path(path_str: str) -> str: path_str = path_str.replace("\\", "/") path_str = posixpath.normpath(path_str) path_str = unicodedata.normalize("NFC", path_str) return path_str def md5_bytes(path_str: str) -> bytes: return hashlib.md5(path_str.encode("utf-8")).digest() # ========================= # MAIN # ========================= def main(): print("=" * 70) print("🚀 TOWER PATH NORMALIZATION MIGRATION") print(f"Host: {HOST_TO_FIX}") print("=" * 70) start = time.time() # --- TWO CONNECTIONS --- db_read = pymysql.connect(**DB_CONFIG) db_write = pymysql.connect(**DB_CONFIG) read_cur = db_read.cursor(pymysql.cursors.SSCursor) write_cur = db_write.cursor() read_cur.execute(""" SELECT id, full_path, path_hash FROM file_md5_index WHERE host_name = %s """, (HOST_TO_FIX,)) total = 0 updated = 0 skipped = 0 pending_commit = 0 while True: rows = read_cur.fetchmany(BATCH_FETCH) if not rows: break for rec_id, full_path, stored_hash in rows: total += 1 new_path = canonical_path(full_path) new_hash = md5_bytes(new_path) if new_path == full_path and new_hash == stored_hash: skipped += 1 continue write_cur.execute(""" UPDATE file_md5_index SET full_path = %s, path_hash = %s WHERE id = %s """, (new_path, new_hash, rec_id)) updated += 1 pending_commit += 1 if pending_commit >= COMMIT_EVERY: db_write.commit() pending_commit = 0 print( f"Checked {total:,} | Updated {updated:,} | Skipped {skipped:,}" ) if pending_commit: db_write.commit() elapsed = time.time() - start print("\n" + "=" * 70) print("✅ MIGRATION FINISHED") print("=" * 70) print(f"Total checked : {total:,}") print(f"Rows updated : {updated:,}") print(f"Rows skipped : {skipped:,}") print(f"Runtime : {elapsed:.1f}s") print("=" * 70) read_cur.close() write_cur.close() db_read.close() db_write.close() if __name__ == "__main__": main()