Files
walkfiles/WalkFilesOnBackupHDD/53 towerpathcorrection.py
2026-02-03 13:26:11 +01:00

140 lines
3.0 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TOWER PATH NORMALIZATION MIGRATION
----------------------------------
✔ Normalizes full_path → NFC canonical
✔ Recalculates path_hash
✔ Uses two DB connections (streaming safe)
✔ Idempotent (safe to rerun)
✔ Production safe
"""
import pymysql
import hashlib
import posixpath
import unicodedata
import time
# =========================
# CONFIG
# =========================
HOST_TO_FIX = "tower"
BATCH_FETCH = 5000
COMMIT_EVERY = 2000
DB_CONFIG = {
"host": "192.168.1.50",
"port": 3306,
"user": "root",
"password": "Vlado9674+",
"database": "torrents",
"charset": "utf8mb4",
"autocommit": False,
}
# =========================
# CANONICALIZATION
# =========================
def canonical_path(path_str: str) -> str:
path_str = path_str.replace("\\", "/")
path_str = posixpath.normpath(path_str)
path_str = unicodedata.normalize("NFC", path_str)
return path_str
def md5_bytes(path_str: str) -> bytes:
return hashlib.md5(path_str.encode("utf-8")).digest()
# =========================
# MAIN
# =========================
def main():
print("=" * 70)
print("🚀 TOWER PATH NORMALIZATION MIGRATION")
print(f"Host: {HOST_TO_FIX}")
print("=" * 70)
start = time.time()
# --- TWO CONNECTIONS ---
db_read = pymysql.connect(**DB_CONFIG)
db_write = pymysql.connect(**DB_CONFIG)
read_cur = db_read.cursor(pymysql.cursors.SSCursor)
write_cur = db_write.cursor()
read_cur.execute("""
SELECT id, full_path, path_hash
FROM file_md5_index
WHERE host_name = %s
""", (HOST_TO_FIX,))
total = 0
updated = 0
skipped = 0
pending_commit = 0
while True:
rows = read_cur.fetchmany(BATCH_FETCH)
if not rows:
break
for rec_id, full_path, stored_hash in rows:
total += 1
new_path = canonical_path(full_path)
new_hash = md5_bytes(new_path)
if new_path == full_path and new_hash == stored_hash:
skipped += 1
continue
write_cur.execute("""
UPDATE file_md5_index
SET full_path = %s,
path_hash = %s
WHERE id = %s
""", (new_path, new_hash, rec_id))
updated += 1
pending_commit += 1
if pending_commit >= COMMIT_EVERY:
db_write.commit()
pending_commit = 0
print(
f"Checked {total:,} | Updated {updated:,} | Skipped {skipped:,}"
)
if pending_commit:
db_write.commit()
elapsed = time.time() - start
print("\n" + "=" * 70)
print("✅ MIGRATION FINISHED")
print("=" * 70)
print(f"Total checked : {total:,}")
print(f"Rows updated : {updated:,}")
print(f"Rows skipped : {skipped:,}")
print(f"Runtime : {elapsed:.1f}s")
print("=" * 70)
read_cur.close()
write_cur.close()
db_read.close()
db_write.close()
if __name__ == "__main__":
main()