z230
This commit is contained in:
161
compare_recovery.py
Normal file
161
compare_recovery.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""
|
||||
Compare original Dropbox folder with recovery folder — file by file.
|
||||
|
||||
Checks:
|
||||
1. Files in original but missing in recovery
|
||||
2. Files in recovery but missing in original
|
||||
3. Size mismatches
|
||||
4. Content mismatches (BLAKE3 hash)
|
||||
|
||||
Usage: python compare_recovery.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from indexer.config import ROOT_PATH
|
||||
from indexer.hasher import blake3_file
|
||||
|
||||
RECOVERY_DIR = r"U:\recovery"
|
||||
|
||||
|
||||
def scan_dir(root: str) -> dict:
|
||||
"""Walk directory, return {relative_path: {size, full_path}}."""
|
||||
result = {}
|
||||
for dirpath, _dirs, files in os.walk(root):
|
||||
for fn in files:
|
||||
full = os.path.join(dirpath, fn)
|
||||
try:
|
||||
size = os.path.getsize(full)
|
||||
except (OSError, PermissionError):
|
||||
continue
|
||||
rel = os.path.relpath(full, root).replace("\\", "/")
|
||||
result[rel] = {"size": size, "full_path": full}
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
original_dir = ROOT_PATH.rstrip("\\/")
|
||||
recovery_dir = RECOVERY_DIR.rstrip("\\/")
|
||||
|
||||
print("=" * 70)
|
||||
print("COMPARE: Original vs Recovery")
|
||||
print(f" Original : {original_dir}")
|
||||
print(f" Recovery : {recovery_dir}")
|
||||
print("=" * 70)
|
||||
|
||||
if not os.path.isdir(original_dir):
|
||||
print(f"ERROR: Original dir not found: {original_dir}")
|
||||
sys.exit(1)
|
||||
if not os.path.isdir(recovery_dir):
|
||||
print(f"ERROR: Recovery dir not found: {recovery_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# ── 1. Scan both directories ──
|
||||
print("\n[1/3] Scanning original...")
|
||||
orig = scan_dir(original_dir)
|
||||
print(f" {len(orig)} files")
|
||||
|
||||
print("[2/3] Scanning recovery...")
|
||||
recov = scan_dir(recovery_dir)
|
||||
print(f" {len(recov)} files")
|
||||
|
||||
orig_paths = set(orig.keys())
|
||||
recov_paths = set(recov.keys())
|
||||
|
||||
missing_in_recovery = sorted(orig_paths - recov_paths)
|
||||
extra_in_recovery = sorted(recov_paths - orig_paths)
|
||||
common = sorted(orig_paths & recov_paths)
|
||||
|
||||
# ── 2. Report missing / extra ──
|
||||
print(f"\n{'='*70}")
|
||||
print(f" Common files : {len(common)}")
|
||||
print(f" Missing in recovery : {len(missing_in_recovery)}")
|
||||
print(f" Extra in recovery : {len(extra_in_recovery)}")
|
||||
print(f"{'='*70}")
|
||||
|
||||
if missing_in_recovery:
|
||||
print(f"\n--- Missing in recovery ({len(missing_in_recovery)}) ---")
|
||||
for p in missing_in_recovery[:50]:
|
||||
print(f" {p} ({orig[p]['size']} bytes)")
|
||||
if len(missing_in_recovery) > 50:
|
||||
print(f" ... and {len(missing_in_recovery) - 50} more")
|
||||
|
||||
if extra_in_recovery:
|
||||
print(f"\n--- Extra in recovery ({len(extra_in_recovery)}) ---")
|
||||
for p in extra_in_recovery[:50]:
|
||||
print(f" {p} ({recov[p]['size']} bytes)")
|
||||
if len(extra_in_recovery) > 50:
|
||||
print(f" ... and {len(extra_in_recovery) - 50} more")
|
||||
|
||||
# ── 3. Compare common files: size + hash ──
|
||||
print(f"\n[3/3] Comparing {len(common)} common files (size + BLAKE3)...")
|
||||
size_mismatch = []
|
||||
hash_mismatch = []
|
||||
hash_ok = 0
|
||||
errors = 0
|
||||
start = time.time()
|
||||
|
||||
for i, p in enumerate(common, 1):
|
||||
o = orig[p]
|
||||
r = recov[p]
|
||||
|
||||
if o["size"] != r["size"]:
|
||||
size_mismatch.append((p, o["size"], r["size"]))
|
||||
continue
|
||||
|
||||
# Same size → compare BLAKE3 hash
|
||||
try:
|
||||
h_orig = blake3_file(o["full_path"])
|
||||
h_recov = blake3_file(r["full_path"])
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
print(f" ERROR hashing {p}: {e}")
|
||||
continue
|
||||
|
||||
if h_orig != h_recov:
|
||||
hash_mismatch.append(p)
|
||||
else:
|
||||
hash_ok += 1
|
||||
|
||||
if i % 2000 == 0:
|
||||
elapsed = time.time() - start
|
||||
print(f" [{i}/{len(common)}] ok={hash_ok} size_diff={len(size_mismatch)} "
|
||||
f"hash_diff={len(hash_mismatch)} errors={errors} elapsed={elapsed:.0f}s")
|
||||
|
||||
elapsed = time.time() - start
|
||||
|
||||
# ── Summary ──
|
||||
print(f"\n{'='*70}")
|
||||
print(f"COMPARISON COMPLETE ({elapsed:.0f}s)")
|
||||
print(f"{'='*70}")
|
||||
print(f" Original files : {len(orig)}")
|
||||
print(f" Recovery files : {len(recov)}")
|
||||
print(f" Missing in recovery : {len(missing_in_recovery)}")
|
||||
print(f" Extra in recovery : {len(extra_in_recovery)}")
|
||||
print(f" Size matches + hash OK: {hash_ok}")
|
||||
print(f" Size mismatches : {len(size_mismatch)}")
|
||||
print(f" Hash mismatches : {len(hash_mismatch)}")
|
||||
print(f" Errors : {errors}")
|
||||
|
||||
if size_mismatch:
|
||||
print(f"\n--- Size mismatches ({len(size_mismatch)}) ---")
|
||||
for p, os_, rs in size_mismatch[:20]:
|
||||
print(f" {p} orig={os_} recov={rs}")
|
||||
|
||||
if hash_mismatch:
|
||||
print(f"\n--- Hash mismatches ({len(hash_mismatch)}) ---")
|
||||
for p in hash_mismatch[:20]:
|
||||
print(f" {p}")
|
||||
|
||||
if not missing_in_recovery and not extra_in_recovery and not size_mismatch and not hash_mismatch and errors == 0:
|
||||
print("\n✓ PERFECT MATCH — recovery is identical to original.")
|
||||
elif not size_mismatch and not hash_mismatch and errors == 0:
|
||||
print(f"\n✓ All {hash_ok} common files match. "
|
||||
f"({len(missing_in_recovery)} missing, {len(extra_in_recovery)} extra)")
|
||||
|
||||
print(f"{'='*70}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user