This commit is contained in:
2026-02-04 16:07:10 +01:00
parent a19281c3a4
commit 782e7d3fb7

View File

@@ -0,0 +1,123 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
import hashlib
import posixpath
import unicodedata
import time
from pymysql.err import IntegrityError
HOST_TO_FIX = "tower1"
BATCH_FETCH = 5000
COMMIT_EVERY = 2000
DB_CONFIG = {
"host": "192.168.1.50",
"port": 3306,
"user": "root",
"password": "Vlado9674+",
"database": "torrents",
"charset": "utf8mb4",
"autocommit": False,
}
def canonical_path(path_str: str) -> str:
path_str = path_str.replace("\\", "/")
path_str = posixpath.normpath(path_str)
path_str = unicodedata.normalize("NFC", path_str)
return path_str
def md5_bytes(path_str: str) -> bytes:
return hashlib.md5(path_str.encode("utf-8")).digest()
def main():
print("🚀 PATH NORMALIZATION WITH TEMP FLAG")
db_read = pymysql.connect(**DB_CONFIG)
db_write = pymysql.connect(**DB_CONFIG)
read_cur = db_read.cursor(pymysql.cursors.SSCursor)
write_cur = db_write.cursor()
read_cur.execute("""
SELECT id, full_path, path_hash
FROM file_md5_index
WHERE host_name=%s
AND path_corrected=0
""", (HOST_TO_FIX,))
total = updated = skipped = deleted = 0
pending_commit = 0
while True:
rows = read_cur.fetchmany(BATCH_FETCH)
if not rows:
break
for rec_id, full_path, stored_hash in rows:
total += 1
new_path = canonical_path(full_path)
new_hash = md5_bytes(new_path)
try:
# už je canonical → jen označit
if new_path == full_path and new_hash == stored_hash:
write_cur.execute("""
UPDATE file_md5_index
SET path_corrected=1
WHERE id=%s
""", (rec_id,))
skipped += 1
else:
write_cur.execute("""
UPDATE file_md5_index
SET full_path=%s,
path_hash=%s,
path_corrected=1
WHERE id=%s
""", (new_path, new_hash, rec_id))
updated += 1
except IntegrityError as e:
# duplicate canonical path → smazat
if e.args[0] == 1062:
write_cur.execute(
"DELETE FROM file_md5_index WHERE id=%s",
(rec_id,)
)
deleted += 1
else:
raise
pending_commit += 1
if pending_commit >= COMMIT_EVERY:
db_write.commit()
pending_commit = 0
print(f"Checked {total:,} | Updated {updated:,} | Skipped {skipped:,} | Deleted {deleted:,}")
if pending_commit:
db_write.commit()
read_cur.close()
write_cur.close()
db_read.close()
db_write.close()
print("✅ FINISHED")
if __name__ == "__main__":
main()