#!/usr/bin/env python3 # -*- coding: utf-8 -*- import pymysql import hashlib import posixpath import unicodedata import time from pymysql.err import IntegrityError HOST_TO_FIX = "tower1" BATCH_FETCH = 5000 COMMIT_EVERY = 2000 DB_CONFIG = { "host": "192.168.1.50", "port": 3306, "user": "root", "password": "Vlado9674+", "database": "torrents", "charset": "utf8mb4", "autocommit": False, } def canonical_path(path_str: str) -> str: path_str = path_str.replace("\\", "/") path_str = posixpath.normpath(path_str) path_str = unicodedata.normalize("NFC", path_str) return path_str def md5_bytes(path_str: str) -> bytes: return hashlib.md5(path_str.encode("utf-8")).digest() def main(): print("🚀 PATH NORMALIZATION WITH TEMP FLAG") db_read = pymysql.connect(**DB_CONFIG) db_write = pymysql.connect(**DB_CONFIG) read_cur = db_read.cursor(pymysql.cursors.SSCursor) write_cur = db_write.cursor() read_cur.execute(""" SELECT id, full_path, path_hash FROM file_md5_index WHERE host_name=%s AND path_corrected=0 """, (HOST_TO_FIX,)) total = updated = skipped = deleted = 0 pending_commit = 0 while True: rows = read_cur.fetchmany(BATCH_FETCH) if not rows: break for rec_id, full_path, stored_hash in rows: total += 1 new_path = canonical_path(full_path) new_hash = md5_bytes(new_path) try: # už je canonical → jen označit if new_path == full_path and new_hash == stored_hash: write_cur.execute(""" UPDATE file_md5_index SET path_corrected=1 WHERE id=%s """, (rec_id,)) skipped += 1 else: write_cur.execute(""" UPDATE file_md5_index SET full_path=%s, path_hash=%s, path_corrected=1 WHERE id=%s """, (new_path, new_hash, rec_id)) updated += 1 except IntegrityError as e: # duplicate canonical path → smazat if e.args[0] == 1062: write_cur.execute( "DELETE FROM file_md5_index WHERE id=%s", (rec_id,) ) deleted += 1 else: raise pending_commit += 1 if pending_commit >= COMMIT_EVERY: db_write.commit() pending_commit = 0 print(f"Checked {total:,} | Updated {updated:,} | Skipped {skipped:,} | Deleted {deleted:,}") if pending_commit: db_write.commit() read_cur.close() write_cur.close() db_read.close() db_write.close() print("✅ FINISHED") if __name__ == "__main__": main()