diff --git a/WalkFilesOnBackupHDD/54 53_aktualizovanýspolemhotovo.py b/WalkFilesOnBackupHDD/54 53_aktualizovanýspolemhotovo.py new file mode 100644 index 0000000..e616633 --- /dev/null +++ b/WalkFilesOnBackupHDD/54 53_aktualizovanýspolemhotovo.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import pymysql +import hashlib +import posixpath +import unicodedata +import time +from pymysql.err import IntegrityError + +HOST_TO_FIX = "tower1" +BATCH_FETCH = 5000 +COMMIT_EVERY = 2000 + +DB_CONFIG = { + "host": "192.168.1.50", + "port": 3306, + "user": "root", + "password": "Vlado9674+", + "database": "torrents", + "charset": "utf8mb4", + "autocommit": False, +} + +def canonical_path(path_str: str) -> str: + path_str = path_str.replace("\\", "/") + path_str = posixpath.normpath(path_str) + path_str = unicodedata.normalize("NFC", path_str) + return path_str + +def md5_bytes(path_str: str) -> bytes: + return hashlib.md5(path_str.encode("utf-8")).digest() + +def main(): + + print("🚀 PATH NORMALIZATION WITH TEMP FLAG") + + db_read = pymysql.connect(**DB_CONFIG) + db_write = pymysql.connect(**DB_CONFIG) + + read_cur = db_read.cursor(pymysql.cursors.SSCursor) + write_cur = db_write.cursor() + + read_cur.execute(""" + SELECT id, full_path, path_hash + FROM file_md5_index + WHERE host_name=%s + AND path_corrected=0 + """, (HOST_TO_FIX,)) + + total = updated = skipped = deleted = 0 + pending_commit = 0 + + while True: + + rows = read_cur.fetchmany(BATCH_FETCH) + if not rows: + break + + for rec_id, full_path, stored_hash in rows: + + total += 1 + + new_path = canonical_path(full_path) + new_hash = md5_bytes(new_path) + + try: + + # už je canonical → jen označit + if new_path == full_path and new_hash == stored_hash: + + write_cur.execute(""" + UPDATE file_md5_index + SET path_corrected=1 + WHERE id=%s + """, (rec_id,)) + + skipped += 1 + + else: + + write_cur.execute(""" + UPDATE file_md5_index + SET full_path=%s, + path_hash=%s, + path_corrected=1 + WHERE id=%s + """, (new_path, new_hash, rec_id)) + + updated += 1 + + except IntegrityError as e: + + # duplicate canonical path → smazat + if e.args[0] == 1062: + write_cur.execute( + "DELETE FROM file_md5_index WHERE id=%s", + (rec_id,) + ) + deleted += 1 + else: + raise + + pending_commit += 1 + + if pending_commit >= COMMIT_EVERY: + db_write.commit() + pending_commit = 0 + + print(f"Checked {total:,} | Updated {updated:,} | Skipped {skipped:,} | Deleted {deleted:,}") + + if pending_commit: + db_write.commit() + + read_cur.close() + write_cur.close() + db_read.close() + db_write.close() + + print("✅ FINISHED") + +if __name__ == "__main__": + main()