162 lines
5.2 KiB
Python
162 lines
5.2 KiB
Python
"""
|
|
Compare original Dropbox folder with recovery folder — file by file.
|
|
|
|
Checks:
|
|
1. Files in original but missing in recovery
|
|
2. Files in recovery but missing in original
|
|
3. Size mismatches
|
|
4. Content mismatches (BLAKE3 hash)
|
|
|
|
Usage: python compare_recovery.py
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
from indexer.config import ROOT_PATH
|
|
from indexer.hasher import blake3_file
|
|
|
|
RECOVERY_DIR = r"U:\recovery"
|
|
|
|
|
|
def scan_dir(root: str) -> dict:
|
|
"""Walk directory, return {relative_path: {size, full_path}}."""
|
|
result = {}
|
|
for dirpath, _dirs, files in os.walk(root):
|
|
for fn in files:
|
|
full = os.path.join(dirpath, fn)
|
|
try:
|
|
size = os.path.getsize(full)
|
|
except (OSError, PermissionError):
|
|
continue
|
|
rel = os.path.relpath(full, root).replace("\\", "/")
|
|
result[rel] = {"size": size, "full_path": full}
|
|
return result
|
|
|
|
|
|
def main():
|
|
original_dir = ROOT_PATH.rstrip("\\/")
|
|
recovery_dir = RECOVERY_DIR.rstrip("\\/")
|
|
|
|
print("=" * 70)
|
|
print("COMPARE: Original vs Recovery")
|
|
print(f" Original : {original_dir}")
|
|
print(f" Recovery : {recovery_dir}")
|
|
print("=" * 70)
|
|
|
|
if not os.path.isdir(original_dir):
|
|
print(f"ERROR: Original dir not found: {original_dir}")
|
|
sys.exit(1)
|
|
if not os.path.isdir(recovery_dir):
|
|
print(f"ERROR: Recovery dir not found: {recovery_dir}")
|
|
sys.exit(1)
|
|
|
|
# ── 1. Scan both directories ──
|
|
print("\n[1/3] Scanning original...")
|
|
orig = scan_dir(original_dir)
|
|
print(f" {len(orig)} files")
|
|
|
|
print("[2/3] Scanning recovery...")
|
|
recov = scan_dir(recovery_dir)
|
|
print(f" {len(recov)} files")
|
|
|
|
orig_paths = set(orig.keys())
|
|
recov_paths = set(recov.keys())
|
|
|
|
missing_in_recovery = sorted(orig_paths - recov_paths)
|
|
extra_in_recovery = sorted(recov_paths - orig_paths)
|
|
common = sorted(orig_paths & recov_paths)
|
|
|
|
# ── 2. Report missing / extra ──
|
|
print(f"\n{'='*70}")
|
|
print(f" Common files : {len(common)}")
|
|
print(f" Missing in recovery : {len(missing_in_recovery)}")
|
|
print(f" Extra in recovery : {len(extra_in_recovery)}")
|
|
print(f"{'='*70}")
|
|
|
|
if missing_in_recovery:
|
|
print(f"\n--- Missing in recovery ({len(missing_in_recovery)}) ---")
|
|
for p in missing_in_recovery[:50]:
|
|
print(f" {p} ({orig[p]['size']} bytes)")
|
|
if len(missing_in_recovery) > 50:
|
|
print(f" ... and {len(missing_in_recovery) - 50} more")
|
|
|
|
if extra_in_recovery:
|
|
print(f"\n--- Extra in recovery ({len(extra_in_recovery)}) ---")
|
|
for p in extra_in_recovery[:50]:
|
|
print(f" {p} ({recov[p]['size']} bytes)")
|
|
if len(extra_in_recovery) > 50:
|
|
print(f" ... and {len(extra_in_recovery) - 50} more")
|
|
|
|
# ── 3. Compare common files: size + hash ──
|
|
print(f"\n[3/3] Comparing {len(common)} common files (size + BLAKE3)...")
|
|
size_mismatch = []
|
|
hash_mismatch = []
|
|
hash_ok = 0
|
|
errors = 0
|
|
start = time.time()
|
|
|
|
for i, p in enumerate(common, 1):
|
|
o = orig[p]
|
|
r = recov[p]
|
|
|
|
if o["size"] != r["size"]:
|
|
size_mismatch.append((p, o["size"], r["size"]))
|
|
continue
|
|
|
|
# Same size → compare BLAKE3 hash
|
|
try:
|
|
h_orig = blake3_file(o["full_path"])
|
|
h_recov = blake3_file(r["full_path"])
|
|
except Exception as e:
|
|
errors += 1
|
|
print(f" ERROR hashing {p}: {e}")
|
|
continue
|
|
|
|
if h_orig != h_recov:
|
|
hash_mismatch.append(p)
|
|
else:
|
|
hash_ok += 1
|
|
|
|
if i % 2000 == 0:
|
|
elapsed = time.time() - start
|
|
print(f" [{i}/{len(common)}] ok={hash_ok} size_diff={len(size_mismatch)} "
|
|
f"hash_diff={len(hash_mismatch)} errors={errors} elapsed={elapsed:.0f}s")
|
|
|
|
elapsed = time.time() - start
|
|
|
|
# ── Summary ──
|
|
print(f"\n{'='*70}")
|
|
print(f"COMPARISON COMPLETE ({elapsed:.0f}s)")
|
|
print(f"{'='*70}")
|
|
print(f" Original files : {len(orig)}")
|
|
print(f" Recovery files : {len(recov)}")
|
|
print(f" Missing in recovery : {len(missing_in_recovery)}")
|
|
print(f" Extra in recovery : {len(extra_in_recovery)}")
|
|
print(f" Size matches + hash OK: {hash_ok}")
|
|
print(f" Size mismatches : {len(size_mismatch)}")
|
|
print(f" Hash mismatches : {len(hash_mismatch)}")
|
|
print(f" Errors : {errors}")
|
|
|
|
if size_mismatch:
|
|
print(f"\n--- Size mismatches ({len(size_mismatch)}) ---")
|
|
for p, os_, rs in size_mismatch[:20]:
|
|
print(f" {p} orig={os_} recov={rs}")
|
|
|
|
if hash_mismatch:
|
|
print(f"\n--- Hash mismatches ({len(hash_mismatch)}) ---")
|
|
for p in hash_mismatch[:20]:
|
|
print(f" {p}")
|
|
|
|
if not missing_in_recovery and not extra_in_recovery and not size_mismatch and not hash_mismatch and errors == 0:
|
|
print("\n✓ PERFECT MATCH — recovery is identical to original.")
|
|
elif not size_mismatch and not hash_mismatch and errors == 0:
|
|
print(f"\n✓ All {hash_ok} common files match. "
|
|
f"({len(missing_in_recovery)} missing, {len(extra_in_recovery)} extra)")
|
|
|
|
print(f"{'='*70}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|