""" Compare original Dropbox folder with recovery folder — file by file. Checks: 1. Files in original but missing in recovery 2. Files in recovery but missing in original 3. Size mismatches 4. Content mismatches (BLAKE3 hash) Usage: python compare_recovery.py """ import os import sys import time from indexer.config import ROOT_PATH from indexer.hasher import blake3_file RECOVERY_DIR = r"U:\recovery" def scan_dir(root: str) -> dict: """Walk directory, return {relative_path: {size, full_path}}.""" result = {} for dirpath, _dirs, files in os.walk(root): for fn in files: full = os.path.join(dirpath, fn) try: size = os.path.getsize(full) except (OSError, PermissionError): continue rel = os.path.relpath(full, root).replace("\\", "/") result[rel] = {"size": size, "full_path": full} return result def main(): original_dir = ROOT_PATH.rstrip("\\/") recovery_dir = RECOVERY_DIR.rstrip("\\/") print("=" * 70) print("COMPARE: Original vs Recovery") print(f" Original : {original_dir}") print(f" Recovery : {recovery_dir}") print("=" * 70) if not os.path.isdir(original_dir): print(f"ERROR: Original dir not found: {original_dir}") sys.exit(1) if not os.path.isdir(recovery_dir): print(f"ERROR: Recovery dir not found: {recovery_dir}") sys.exit(1) # ── 1. Scan both directories ── print("\n[1/3] Scanning original...") orig = scan_dir(original_dir) print(f" {len(orig)} files") print("[2/3] Scanning recovery...") recov = scan_dir(recovery_dir) print(f" {len(recov)} files") orig_paths = set(orig.keys()) recov_paths = set(recov.keys()) missing_in_recovery = sorted(orig_paths - recov_paths) extra_in_recovery = sorted(recov_paths - orig_paths) common = sorted(orig_paths & recov_paths) # ── 2. Report missing / extra ── print(f"\n{'='*70}") print(f" Common files : {len(common)}") print(f" Missing in recovery : {len(missing_in_recovery)}") print(f" Extra in recovery : {len(extra_in_recovery)}") print(f"{'='*70}") if missing_in_recovery: print(f"\n--- Missing in recovery ({len(missing_in_recovery)}) ---") for p in missing_in_recovery[:50]: print(f" {p} ({orig[p]['size']} bytes)") if len(missing_in_recovery) > 50: print(f" ... and {len(missing_in_recovery) - 50} more") if extra_in_recovery: print(f"\n--- Extra in recovery ({len(extra_in_recovery)}) ---") for p in extra_in_recovery[:50]: print(f" {p} ({recov[p]['size']} bytes)") if len(extra_in_recovery) > 50: print(f" ... and {len(extra_in_recovery) - 50} more") # ── 3. Compare common files: size + hash ── print(f"\n[3/3] Comparing {len(common)} common files (size + BLAKE3)...") size_mismatch = [] hash_mismatch = [] hash_ok = 0 errors = 0 start = time.time() for i, p in enumerate(common, 1): o = orig[p] r = recov[p] if o["size"] != r["size"]: size_mismatch.append((p, o["size"], r["size"])) continue # Same size → compare BLAKE3 hash try: h_orig = blake3_file(o["full_path"]) h_recov = blake3_file(r["full_path"]) except Exception as e: errors += 1 print(f" ERROR hashing {p}: {e}") continue if h_orig != h_recov: hash_mismatch.append(p) else: hash_ok += 1 if i % 2000 == 0: elapsed = time.time() - start print(f" [{i}/{len(common)}] ok={hash_ok} size_diff={len(size_mismatch)} " f"hash_diff={len(hash_mismatch)} errors={errors} elapsed={elapsed:.0f}s") elapsed = time.time() - start # ── Summary ── print(f"\n{'='*70}") print(f"COMPARISON COMPLETE ({elapsed:.0f}s)") print(f"{'='*70}") print(f" Original files : {len(orig)}") print(f" Recovery files : {len(recov)}") print(f" Missing in recovery : {len(missing_in_recovery)}") print(f" Extra in recovery : {len(extra_in_recovery)}") print(f" Size matches + hash OK: {hash_ok}") print(f" Size mismatches : {len(size_mismatch)}") print(f" Hash mismatches : {len(hash_mismatch)}") print(f" Errors : {errors}") if size_mismatch: print(f"\n--- Size mismatches ({len(size_mismatch)}) ---") for p, os_, rs in size_mismatch[:20]: print(f" {p} orig={os_} recov={rs}") if hash_mismatch: print(f"\n--- Hash mismatches ({len(hash_mismatch)}) ---") for p in hash_mismatch[:20]: print(f" {p}") if not missing_in_recovery and not extra_in_recovery and not size_mismatch and not hash_mismatch and errors == 0: print("\n✓ PERFECT MATCH — recovery is identical to original.") elif not size_mismatch and not hash_mismatch and errors == 0: print(f"\n✓ All {hash_ok} common files match. " f"({len(missing_in_recovery)} missing, {len(extra_in_recovery)} extra)") print(f"{'='*70}") if __name__ == "__main__": main()