This commit is contained in:
2026-02-12 11:29:41 +01:00
parent bd4aede24a
commit c71655cec2

161
compare_recovery.py Normal file
View File

@@ -0,0 +1,161 @@
"""
Compare original Dropbox folder with recovery folder — file by file.
Checks:
1. Files in original but missing in recovery
2. Files in recovery but missing in original
3. Size mismatches
4. Content mismatches (BLAKE3 hash)
Usage: python compare_recovery.py
"""
import os
import sys
import time
from indexer.config import ROOT_PATH
from indexer.hasher import blake3_file
RECOVERY_DIR = r"U:\recovery"
def scan_dir(root: str) -> dict:
"""Walk directory, return {relative_path: {size, full_path}}."""
result = {}
for dirpath, _dirs, files in os.walk(root):
for fn in files:
full = os.path.join(dirpath, fn)
try:
size = os.path.getsize(full)
except (OSError, PermissionError):
continue
rel = os.path.relpath(full, root).replace("\\", "/")
result[rel] = {"size": size, "full_path": full}
return result
def main():
original_dir = ROOT_PATH.rstrip("\\/")
recovery_dir = RECOVERY_DIR.rstrip("\\/")
print("=" * 70)
print("COMPARE: Original vs Recovery")
print(f" Original : {original_dir}")
print(f" Recovery : {recovery_dir}")
print("=" * 70)
if not os.path.isdir(original_dir):
print(f"ERROR: Original dir not found: {original_dir}")
sys.exit(1)
if not os.path.isdir(recovery_dir):
print(f"ERROR: Recovery dir not found: {recovery_dir}")
sys.exit(1)
# ── 1. Scan both directories ──
print("\n[1/3] Scanning original...")
orig = scan_dir(original_dir)
print(f" {len(orig)} files")
print("[2/3] Scanning recovery...")
recov = scan_dir(recovery_dir)
print(f" {len(recov)} files")
orig_paths = set(orig.keys())
recov_paths = set(recov.keys())
missing_in_recovery = sorted(orig_paths - recov_paths)
extra_in_recovery = sorted(recov_paths - orig_paths)
common = sorted(orig_paths & recov_paths)
# ── 2. Report missing / extra ──
print(f"\n{'='*70}")
print(f" Common files : {len(common)}")
print(f" Missing in recovery : {len(missing_in_recovery)}")
print(f" Extra in recovery : {len(extra_in_recovery)}")
print(f"{'='*70}")
if missing_in_recovery:
print(f"\n--- Missing in recovery ({len(missing_in_recovery)}) ---")
for p in missing_in_recovery[:50]:
print(f" {p} ({orig[p]['size']} bytes)")
if len(missing_in_recovery) > 50:
print(f" ... and {len(missing_in_recovery) - 50} more")
if extra_in_recovery:
print(f"\n--- Extra in recovery ({len(extra_in_recovery)}) ---")
for p in extra_in_recovery[:50]:
print(f" {p} ({recov[p]['size']} bytes)")
if len(extra_in_recovery) > 50:
print(f" ... and {len(extra_in_recovery) - 50} more")
# ── 3. Compare common files: size + hash ──
print(f"\n[3/3] Comparing {len(common)} common files (size + BLAKE3)...")
size_mismatch = []
hash_mismatch = []
hash_ok = 0
errors = 0
start = time.time()
for i, p in enumerate(common, 1):
o = orig[p]
r = recov[p]
if o["size"] != r["size"]:
size_mismatch.append((p, o["size"], r["size"]))
continue
# Same size → compare BLAKE3 hash
try:
h_orig = blake3_file(o["full_path"])
h_recov = blake3_file(r["full_path"])
except Exception as e:
errors += 1
print(f" ERROR hashing {p}: {e}")
continue
if h_orig != h_recov:
hash_mismatch.append(p)
else:
hash_ok += 1
if i % 2000 == 0:
elapsed = time.time() - start
print(f" [{i}/{len(common)}] ok={hash_ok} size_diff={len(size_mismatch)} "
f"hash_diff={len(hash_mismatch)} errors={errors} elapsed={elapsed:.0f}s")
elapsed = time.time() - start
# ── Summary ──
print(f"\n{'='*70}")
print(f"COMPARISON COMPLETE ({elapsed:.0f}s)")
print(f"{'='*70}")
print(f" Original files : {len(orig)}")
print(f" Recovery files : {len(recov)}")
print(f" Missing in recovery : {len(missing_in_recovery)}")
print(f" Extra in recovery : {len(extra_in_recovery)}")
print(f" Size matches + hash OK: {hash_ok}")
print(f" Size mismatches : {len(size_mismatch)}")
print(f" Hash mismatches : {len(hash_mismatch)}")
print(f" Errors : {errors}")
if size_mismatch:
print(f"\n--- Size mismatches ({len(size_mismatch)}) ---")
for p, os_, rs in size_mismatch[:20]:
print(f" {p} orig={os_} recov={rs}")
if hash_mismatch:
print(f"\n--- Hash mismatches ({len(hash_mismatch)}) ---")
for p in hash_mismatch[:20]:
print(f" {p}")
if not missing_in_recovery and not extra_in_recovery and not size_mismatch and not hash_mismatch and errors == 0:
print("\n✓ PERFECT MATCH — recovery is identical to original.")
elif not size_mismatch and not hash_mismatch and errors == 0:
print(f"\n✓ All {hash_ok} common files match. "
f"({len(missing_in_recovery)} missing, {len(extra_in_recovery)} extra)")
print(f"{'='*70}")
if __name__ == "__main__":
main()