z230
This commit is contained in:
158
WalkFilesOnBackupHDD/40 TestPathNormalizedinTable.py
Normal file
158
WalkFilesOnBackupHDD/40 TestPathNormalizedinTable.py
Normal file
@@ -0,0 +1,158 @@
|
||||
#!/opt/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pymysql
|
||||
import hashlib
|
||||
import posixpath
|
||||
import unicodedata
|
||||
from binascii import hexlify
|
||||
|
||||
# ============================================================
|
||||
# CONFIG
|
||||
# ============================================================
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.50",
|
||||
"port": 3306,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
}
|
||||
|
||||
HOST_FILTER = "tower" # None = all hosts
|
||||
LIMIT = None # e.g. 50000 for testing
|
||||
SHOW_EXAMPLES = 20
|
||||
|
||||
# ============================================================
|
||||
# CANONICAL PATH
|
||||
# ============================================================
|
||||
|
||||
def canonical_path(path_str: str) -> str:
|
||||
if not path_str:
|
||||
return path_str
|
||||
|
||||
path_str = path_str.replace("\\", "/")
|
||||
path_str = posixpath.normpath(path_str)
|
||||
path_str = unicodedata.normalize("NFC", path_str)
|
||||
|
||||
return path_str
|
||||
|
||||
def md5_bytes(path_str: str) -> bytes:
|
||||
return hashlib.md5(path_str.encode("utf-8")).digest()
|
||||
|
||||
# ============================================================
|
||||
# MAIN
|
||||
# ============================================================
|
||||
|
||||
def main():
|
||||
|
||||
db = pymysql.connect(**DB_CONFIG)
|
||||
cur = db.cursor(pymysql.cursors.SSCursor)
|
||||
|
||||
sql = """
|
||||
SELECT id, full_path, path_hash
|
||||
FROM file_md5_index
|
||||
"""
|
||||
|
||||
params = []
|
||||
|
||||
if HOST_FILTER:
|
||||
sql += " WHERE host_name = %s"
|
||||
params.append(HOST_FILTER)
|
||||
|
||||
if LIMIT:
|
||||
sql += " LIMIT %s"
|
||||
params.append(LIMIT)
|
||||
|
||||
cur.execute(sql, params)
|
||||
|
||||
total = 0
|
||||
ok = 0
|
||||
path_change = 0
|
||||
hash_change = 0
|
||||
|
||||
examples_path = []
|
||||
examples_hash = []
|
||||
|
||||
for rec_id, full_path, stored_hash in cur:
|
||||
|
||||
total += 1
|
||||
|
||||
canonical = canonical_path(full_path)
|
||||
|
||||
raw_hash = md5_bytes(full_path)
|
||||
canonical_hash = md5_bytes(canonical)
|
||||
|
||||
# ---------------------------------------------------
|
||||
# CASE 1: fully OK
|
||||
# ---------------------------------------------------
|
||||
if full_path == canonical and stored_hash == canonical_hash:
|
||||
ok += 1
|
||||
|
||||
# ---------------------------------------------------
|
||||
# CASE 2: path string would change
|
||||
# ---------------------------------------------------
|
||||
if full_path != canonical:
|
||||
path_change += 1
|
||||
|
||||
if len(examples_path) < SHOW_EXAMPLES:
|
||||
examples_path.append((rec_id, full_path, canonical))
|
||||
|
||||
# ---------------------------------------------------
|
||||
# CASE 3: hash would change
|
||||
# ---------------------------------------------------
|
||||
if stored_hash != canonical_hash:
|
||||
hash_change += 1
|
||||
|
||||
if len(examples_hash) < SHOW_EXAMPLES:
|
||||
examples_hash.append(
|
||||
(rec_id, full_path,
|
||||
hexlify(stored_hash).decode(),
|
||||
hexlify(canonical_hash).decode())
|
||||
)
|
||||
|
||||
if total % 100000 == 0:
|
||||
print(f"Checked {total:,} rows...")
|
||||
|
||||
# ============================================================
|
||||
# REPORT
|
||||
# ============================================================
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("AUDIT SUMMARY")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"Total rows checked : {total:,}")
|
||||
print(f"OK (already canonical + hash OK) : {ok:,}")
|
||||
print(f"Paths that would change : {path_change:,}")
|
||||
print(f"Hashes that would change : {hash_change:,}")
|
||||
|
||||
print("=" * 70)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# SHOW EXAMPLES
|
||||
# ------------------------------------------------------------
|
||||
|
||||
if examples_path:
|
||||
print("\n⚠ PATH CHANGE EXAMPLES:")
|
||||
for rec_id, old, new in examples_path:
|
||||
print(f"[id={rec_id}]")
|
||||
print(" DB :", old)
|
||||
print(" NEW:", new)
|
||||
print()
|
||||
|
||||
if examples_hash:
|
||||
print("\n❌ HASH CHANGE EXAMPLES:")
|
||||
for rec_id, path, old_hash, new_hash in examples_hash:
|
||||
print(f"[id={rec_id}] {path}")
|
||||
print(" Stored :", old_hash)
|
||||
print(" New :", new_hash)
|
||||
print()
|
||||
|
||||
cur.close()
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
188
WalkFilesOnBackupHDD/50 Onetimepathnormalization.py
Normal file
188
WalkFilesOnBackupHDD/50 Onetimepathnormalization.py
Normal file
@@ -0,0 +1,188 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
ONE-TIME MIGRATION: Normalize full_path (NFC, forward slashes) + recompute path_hash
|
||||
- Targets ONLY one host_name (Tower1 by default)
|
||||
- Safe with UNIQUE(host_name, path_hash)
|
||||
- Handles collisions by skipping conflicting rows and logging them
|
||||
- DRY_RUN supported
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
import hashlib
|
||||
import posixpath
|
||||
import unicodedata
|
||||
import pymysql
|
||||
from pymysql.err import IntegrityError
|
||||
|
||||
# =========================
|
||||
# CONFIG
|
||||
# =========================
|
||||
HOST_TO_FIX = "Tower" # <-- set your Unraid host_name exactly as stored in DB
|
||||
DRY_RUN = True # <-- first run True; then switch to False to apply
|
||||
BATCH_SELECT_FETCH = 5000 # server-side cursor fetch size (streaming)
|
||||
COMMIT_EVERY = 2000 # commit after N successful updates (when DRY_RUN=False)
|
||||
LOG_EVERY = 50000 # progress print
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.50",
|
||||
"port": 3306,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
"autocommit": False,
|
||||
}
|
||||
|
||||
# =========================
|
||||
# CANONICALIZATION
|
||||
# =========================
|
||||
def canonical_path(path_str: str) -> str:
|
||||
if not path_str:
|
||||
return path_str
|
||||
path_str = path_str.replace("\\", "/")
|
||||
path_str = posixpath.normpath(path_str)
|
||||
path_str = unicodedata.normalize("NFC", path_str)
|
||||
return path_str
|
||||
|
||||
def md5_bytes(path_str: str) -> bytes:
|
||||
return hashlib.md5(path_str.encode("utf-8")).digest() # 16 raw bytes for BINARY(16)
|
||||
|
||||
# =========================
|
||||
# MAIN
|
||||
# =========================
|
||||
def main():
|
||||
print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] 🚀 Tower path_hash migration")
|
||||
print(f"Host: {HOST_TO_FIX}")
|
||||
print(f"DRY_RUN: {DRY_RUN}")
|
||||
sys.stdout.flush()
|
||||
|
||||
db = pymysql.connect(**DB_CONFIG)
|
||||
|
||||
# streaming cursor for reading
|
||||
read_cur = db.cursor(pymysql.cursors.SSCursor)
|
||||
read_cur.execute(
|
||||
"""
|
||||
SELECT id, full_path, path_hash
|
||||
FROM file_md5_index
|
||||
WHERE host_name = %s
|
||||
""",
|
||||
(HOST_TO_FIX,),
|
||||
)
|
||||
|
||||
# normal cursor for updates
|
||||
upd_cur = db.cursor()
|
||||
|
||||
total = 0
|
||||
needs_change = 0
|
||||
updated_ok = 0
|
||||
collisions = 0
|
||||
other_errors = 0
|
||||
|
||||
start = time.time()
|
||||
pending_commits = 0
|
||||
|
||||
# Optional: make server-side cursor fetch a bit larger
|
||||
# (PyMySQL streams regardless; this just makes loop smoother)
|
||||
# Not strictly necessary.
|
||||
|
||||
while True:
|
||||
rows = read_cur.fetchmany(BATCH_SELECT_FETCH)
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for rec_id, full_path, stored_hash in rows:
|
||||
total += 1
|
||||
|
||||
new_path = canonical_path(full_path)
|
||||
new_hash = md5_bytes(new_path)
|
||||
|
||||
# already canonical & correct
|
||||
if new_path == full_path and new_hash == stored_hash:
|
||||
if total % LOG_EVERY == 0:
|
||||
elapsed = time.time() - start
|
||||
print(f"Checked {total:,} | needs_change {needs_change:,} | updated {updated_ok:,} | collisions {collisions:,} | {elapsed:.1f}s")
|
||||
sys.stdout.flush()
|
||||
continue
|
||||
|
||||
needs_change += 1
|
||||
|
||||
if DRY_RUN:
|
||||
# in dry-run we just count; no DB writes
|
||||
continue
|
||||
|
||||
# Update with collision handling via UNIQUE(host_name, path_hash)
|
||||
try:
|
||||
# Use a savepoint so a duplicate-key error doesn't kill the whole transaction
|
||||
upd_cur.execute("SAVEPOINT sp_one;")
|
||||
|
||||
upd_cur.execute(
|
||||
"""
|
||||
UPDATE file_md5_index
|
||||
SET full_path = %s,
|
||||
path_hash = %s
|
||||
WHERE id = %s
|
||||
""",
|
||||
(new_path, new_hash, rec_id),
|
||||
)
|
||||
|
||||
upd_cur.execute("RELEASE SAVEPOINT sp_one;")
|
||||
|
||||
updated_ok += 1
|
||||
pending_commits += 1
|
||||
|
||||
if pending_commits >= COMMIT_EVERY:
|
||||
db.commit()
|
||||
pending_commits = 0
|
||||
|
||||
except IntegrityError as e:
|
||||
# Duplicate key = collision on (host_name, path_hash)
|
||||
# This means some OTHER row in the same host already has this new_hash.
|
||||
upd_cur.execute("ROLLBACK TO SAVEPOINT sp_one;")
|
||||
upd_cur.execute("RELEASE SAVEPOINT sp_one;")
|
||||
collisions += 1
|
||||
|
||||
# Print a short line occasionally (avoid huge spam)
|
||||
if collisions <= 50 or collisions % 1000 == 0:
|
||||
print(f"⚠ COLLISION id={rec_id} | {e}")
|
||||
sys.stdout.flush()
|
||||
|
||||
except Exception as e:
|
||||
upd_cur.execute("ROLLBACK TO SAVEPOINT sp_one;")
|
||||
upd_cur.execute("RELEASE SAVEPOINT sp_one;")
|
||||
other_errors += 1
|
||||
if other_errors <= 50 or other_errors % 1000 == 0:
|
||||
print(f"❌ ERROR id={rec_id} | {e}")
|
||||
sys.stdout.flush()
|
||||
|
||||
if total % LOG_EVERY == 0:
|
||||
elapsed = time.time() - start
|
||||
print(f"Checked {total:,} | needs_change {needs_change:,} | updated {updated_ok:,} | collisions {collisions:,} | {elapsed:.1f}s")
|
||||
sys.stdout.flush()
|
||||
|
||||
# finalize
|
||||
if not DRY_RUN:
|
||||
if pending_commits:
|
||||
db.commit()
|
||||
print("✅ Migration finished (committed).")
|
||||
else:
|
||||
print("⚠ DRY_RUN finished (no changes written).")
|
||||
|
||||
elapsed = time.time() - start
|
||||
print("=" * 70)
|
||||
print(f"Total rows checked : {total:,}")
|
||||
print(f"Rows needing change : {needs_change:,}")
|
||||
print(f"Rows updated : {updated_ok:,}")
|
||||
print(f"Collisions (skipped) : {collisions:,}")
|
||||
print(f"Other errors : {other_errors:,}")
|
||||
print(f"Elapsed : {elapsed:.1f}s")
|
||||
print("=" * 70)
|
||||
|
||||
read_cur.close()
|
||||
upd_cur.close()
|
||||
db.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
146
WalkFilesOnBackupHDD/51 testthoseneedchangewhetherok.py
Normal file
146
WalkFilesOnBackupHDD/51 testthoseneedchangewhetherok.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
import posixpath
|
||||
import unicodedata
|
||||
import pymysql
|
||||
import time
|
||||
|
||||
# =========================
|
||||
# CONFIG
|
||||
# =========================
|
||||
|
||||
HOST_TO_CHECK = "Tower"
|
||||
WINDOWS_UNC_BASE = r"\\tower"
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.50",
|
||||
"port": 3306,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
}
|
||||
|
||||
PRINT_FIRST_CHANGES = 20
|
||||
LOG_EVERY = 5000
|
||||
|
||||
# =========================
|
||||
# CANONICAL
|
||||
# =========================
|
||||
|
||||
def canonical_path(path_str):
|
||||
path_str = path_str.replace("\\", "/")
|
||||
path_str = posixpath.normpath(path_str)
|
||||
path_str = unicodedata.normalize("NFC", path_str)
|
||||
return path_str
|
||||
|
||||
def md5_bytes(path_str):
|
||||
return hashlib.md5(path_str.encode("utf-8")).digest()
|
||||
|
||||
# =========================
|
||||
# PATH MAP
|
||||
# =========================
|
||||
|
||||
def linux_to_windows_unc(linux_path):
|
||||
rel = linux_path[len("/mnt/user/"):]
|
||||
return os.path.join(WINDOWS_UNC_BASE, *rel.split("/"))
|
||||
|
||||
# =========================
|
||||
# MAIN
|
||||
# =========================
|
||||
|
||||
def main():
|
||||
|
||||
print("=" * 70)
|
||||
print("🔍 Tower Canonical Path SMB Verification")
|
||||
print(f"Host: {HOST_TO_CHECK}")
|
||||
print(f"UNC Base: {WINDOWS_UNC_BASE}")
|
||||
print("=" * 70)
|
||||
|
||||
db = pymysql.connect(**DB_CONFIG)
|
||||
cur = db.cursor(pymysql.cursors.SSCursor)
|
||||
|
||||
cur.execute("""
|
||||
SELECT id, full_path, path_hash
|
||||
FROM file_md5_index
|
||||
WHERE host_name = %s
|
||||
""", (HOST_TO_CHECK,))
|
||||
|
||||
total = 0
|
||||
needs_change = 0
|
||||
exists_ok = 0
|
||||
missing = 0
|
||||
|
||||
printed_changes = 0
|
||||
|
||||
start = time.time()
|
||||
|
||||
for rec_id, full_path, stored_hash in cur:
|
||||
|
||||
total += 1
|
||||
|
||||
new_path = canonical_path(full_path)
|
||||
new_hash = md5_bytes(new_path)
|
||||
|
||||
# Already canonical
|
||||
if new_path == full_path and new_hash == stored_hash:
|
||||
continue
|
||||
|
||||
needs_change += 1
|
||||
|
||||
win_path = linux_to_windows_unc(new_path)
|
||||
exists = os.path.exists(win_path)
|
||||
|
||||
if exists:
|
||||
exists_ok += 1
|
||||
else:
|
||||
missing += 1
|
||||
|
||||
# ---- Print first examples ----
|
||||
if printed_changes < PRINT_FIRST_CHANGES:
|
||||
print("\n🔧 CHANGE DETECTED")
|
||||
print(f"ID : {rec_id}")
|
||||
print(f"DB PATH : {full_path}")
|
||||
print(f"NEW PATH : {new_path}")
|
||||
print(f"WIN PATH : {win_path}")
|
||||
print(f"Exists : {exists}")
|
||||
printed_changes += 1
|
||||
|
||||
# ---- Progress ----
|
||||
if total % LOG_EVERY == 0:
|
||||
elapsed = time.time() - start
|
||||
rate = total / elapsed if elapsed else 0
|
||||
|
||||
print(
|
||||
f"📊 Checked {total:,} rows | "
|
||||
f"Needs change {needs_change:,} | "
|
||||
f"Exists {exists_ok:,} | "
|
||||
f"Missing {missing:,} | "
|
||||
f"{rate:,.0f} rows/sec"
|
||||
)
|
||||
|
||||
# =========================
|
||||
# SUMMARY
|
||||
# =========================
|
||||
|
||||
elapsed = time.time() - start
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("✅ FINAL SUMMARY")
|
||||
print("=" * 70)
|
||||
print(f"Total scanned : {total:,}")
|
||||
print(f"Needs change : {needs_change:,}")
|
||||
print(f"Exists on Tower : {exists_ok:,}")
|
||||
print(f"Missing on Tower : {missing:,}")
|
||||
print(f"Runtime : {elapsed:.1f}s")
|
||||
print("=" * 70)
|
||||
|
||||
cur.close()
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
139
WalkFilesOnBackupHDD/53 towerpathcorrection.py
Normal file
139
WalkFilesOnBackupHDD/53 towerpathcorrection.py
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
TOWER PATH NORMALIZATION MIGRATION
|
||||
----------------------------------
|
||||
✔ Normalizes full_path → NFC canonical
|
||||
✔ Recalculates path_hash
|
||||
✔ Uses two DB connections (streaming safe)
|
||||
✔ Idempotent (safe to rerun)
|
||||
✔ Production safe
|
||||
"""
|
||||
|
||||
import pymysql
|
||||
import hashlib
|
||||
import posixpath
|
||||
import unicodedata
|
||||
import time
|
||||
|
||||
# =========================
|
||||
# CONFIG
|
||||
# =========================
|
||||
|
||||
HOST_TO_FIX = "tower"
|
||||
BATCH_FETCH = 5000
|
||||
COMMIT_EVERY = 2000
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.50",
|
||||
"port": 3306,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
"autocommit": False,
|
||||
}
|
||||
|
||||
# =========================
|
||||
# CANONICALIZATION
|
||||
# =========================
|
||||
|
||||
def canonical_path(path_str: str) -> str:
|
||||
path_str = path_str.replace("\\", "/")
|
||||
path_str = posixpath.normpath(path_str)
|
||||
path_str = unicodedata.normalize("NFC", path_str)
|
||||
return path_str
|
||||
|
||||
def md5_bytes(path_str: str) -> bytes:
|
||||
return hashlib.md5(path_str.encode("utf-8")).digest()
|
||||
|
||||
# =========================
|
||||
# MAIN
|
||||
# =========================
|
||||
|
||||
def main():
|
||||
|
||||
print("=" * 70)
|
||||
print("🚀 TOWER PATH NORMALIZATION MIGRATION")
|
||||
print(f"Host: {HOST_TO_FIX}")
|
||||
print("=" * 70)
|
||||
|
||||
start = time.time()
|
||||
|
||||
# --- TWO CONNECTIONS ---
|
||||
db_read = pymysql.connect(**DB_CONFIG)
|
||||
db_write = pymysql.connect(**DB_CONFIG)
|
||||
|
||||
read_cur = db_read.cursor(pymysql.cursors.SSCursor)
|
||||
write_cur = db_write.cursor()
|
||||
|
||||
read_cur.execute("""
|
||||
SELECT id, full_path, path_hash
|
||||
FROM file_md5_index
|
||||
WHERE host_name = %s
|
||||
""", (HOST_TO_FIX,))
|
||||
|
||||
total = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
pending_commit = 0
|
||||
|
||||
while True:
|
||||
|
||||
rows = read_cur.fetchmany(BATCH_FETCH)
|
||||
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for rec_id, full_path, stored_hash in rows:
|
||||
|
||||
total += 1
|
||||
|
||||
new_path = canonical_path(full_path)
|
||||
new_hash = md5_bytes(new_path)
|
||||
|
||||
if new_path == full_path and new_hash == stored_hash:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
write_cur.execute("""
|
||||
UPDATE file_md5_index
|
||||
SET full_path = %s,
|
||||
path_hash = %s
|
||||
WHERE id = %s
|
||||
""", (new_path, new_hash, rec_id))
|
||||
|
||||
updated += 1
|
||||
pending_commit += 1
|
||||
|
||||
if pending_commit >= COMMIT_EVERY:
|
||||
db_write.commit()
|
||||
pending_commit = 0
|
||||
|
||||
print(
|
||||
f"Checked {total:,} | Updated {updated:,} | Skipped {skipped:,}"
|
||||
)
|
||||
|
||||
if pending_commit:
|
||||
db_write.commit()
|
||||
|
||||
elapsed = time.time() - start
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("✅ MIGRATION FINISHED")
|
||||
print("=" * 70)
|
||||
print(f"Total checked : {total:,}")
|
||||
print(f"Rows updated : {updated:,}")
|
||||
print(f"Rows skipped : {skipped:,}")
|
||||
print(f"Runtime : {elapsed:.1f}s")
|
||||
print("=" * 70)
|
||||
|
||||
read_cur.close()
|
||||
write_cur.close()
|
||||
db_read.close()
|
||||
db_write.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user