From c71655cec251f519d1a6171353ce99f6f385c64a Mon Sep 17 00:00:00 2001 From: "vladimir.buzalka" Date: Thu, 12 Feb 2026 11:29:41 +0100 Subject: [PATCH] z230 --- compare_recovery.py | 161 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 compare_recovery.py diff --git a/compare_recovery.py b/compare_recovery.py new file mode 100644 index 0000000..ff4a9cb --- /dev/null +++ b/compare_recovery.py @@ -0,0 +1,161 @@ +""" +Compare original Dropbox folder with recovery folder — file by file. + +Checks: + 1. Files in original but missing in recovery + 2. Files in recovery but missing in original + 3. Size mismatches + 4. Content mismatches (BLAKE3 hash) + +Usage: python compare_recovery.py +""" + +import os +import sys +import time +from indexer.config import ROOT_PATH +from indexer.hasher import blake3_file + +RECOVERY_DIR = r"U:\recovery" + + +def scan_dir(root: str) -> dict: + """Walk directory, return {relative_path: {size, full_path}}.""" + result = {} + for dirpath, _dirs, files in os.walk(root): + for fn in files: + full = os.path.join(dirpath, fn) + try: + size = os.path.getsize(full) + except (OSError, PermissionError): + continue + rel = os.path.relpath(full, root).replace("\\", "/") + result[rel] = {"size": size, "full_path": full} + return result + + +def main(): + original_dir = ROOT_PATH.rstrip("\\/") + recovery_dir = RECOVERY_DIR.rstrip("\\/") + + print("=" * 70) + print("COMPARE: Original vs Recovery") + print(f" Original : {original_dir}") + print(f" Recovery : {recovery_dir}") + print("=" * 70) + + if not os.path.isdir(original_dir): + print(f"ERROR: Original dir not found: {original_dir}") + sys.exit(1) + if not os.path.isdir(recovery_dir): + print(f"ERROR: Recovery dir not found: {recovery_dir}") + sys.exit(1) + + # ── 1. Scan both directories ── + print("\n[1/3] Scanning original...") + orig = scan_dir(original_dir) + print(f" {len(orig)} files") + + print("[2/3] Scanning recovery...") + recov = scan_dir(recovery_dir) + print(f" {len(recov)} files") + + orig_paths = set(orig.keys()) + recov_paths = set(recov.keys()) + + missing_in_recovery = sorted(orig_paths - recov_paths) + extra_in_recovery = sorted(recov_paths - orig_paths) + common = sorted(orig_paths & recov_paths) + + # ── 2. Report missing / extra ── + print(f"\n{'='*70}") + print(f" Common files : {len(common)}") + print(f" Missing in recovery : {len(missing_in_recovery)}") + print(f" Extra in recovery : {len(extra_in_recovery)}") + print(f"{'='*70}") + + if missing_in_recovery: + print(f"\n--- Missing in recovery ({len(missing_in_recovery)}) ---") + for p in missing_in_recovery[:50]: + print(f" {p} ({orig[p]['size']} bytes)") + if len(missing_in_recovery) > 50: + print(f" ... and {len(missing_in_recovery) - 50} more") + + if extra_in_recovery: + print(f"\n--- Extra in recovery ({len(extra_in_recovery)}) ---") + for p in extra_in_recovery[:50]: + print(f" {p} ({recov[p]['size']} bytes)") + if len(extra_in_recovery) > 50: + print(f" ... and {len(extra_in_recovery) - 50} more") + + # ── 3. Compare common files: size + hash ── + print(f"\n[3/3] Comparing {len(common)} common files (size + BLAKE3)...") + size_mismatch = [] + hash_mismatch = [] + hash_ok = 0 + errors = 0 + start = time.time() + + for i, p in enumerate(common, 1): + o = orig[p] + r = recov[p] + + if o["size"] != r["size"]: + size_mismatch.append((p, o["size"], r["size"])) + continue + + # Same size → compare BLAKE3 hash + try: + h_orig = blake3_file(o["full_path"]) + h_recov = blake3_file(r["full_path"]) + except Exception as e: + errors += 1 + print(f" ERROR hashing {p}: {e}") + continue + + if h_orig != h_recov: + hash_mismatch.append(p) + else: + hash_ok += 1 + + if i % 2000 == 0: + elapsed = time.time() - start + print(f" [{i}/{len(common)}] ok={hash_ok} size_diff={len(size_mismatch)} " + f"hash_diff={len(hash_mismatch)} errors={errors} elapsed={elapsed:.0f}s") + + elapsed = time.time() - start + + # ── Summary ── + print(f"\n{'='*70}") + print(f"COMPARISON COMPLETE ({elapsed:.0f}s)") + print(f"{'='*70}") + print(f" Original files : {len(orig)}") + print(f" Recovery files : {len(recov)}") + print(f" Missing in recovery : {len(missing_in_recovery)}") + print(f" Extra in recovery : {len(extra_in_recovery)}") + print(f" Size matches + hash OK: {hash_ok}") + print(f" Size mismatches : {len(size_mismatch)}") + print(f" Hash mismatches : {len(hash_mismatch)}") + print(f" Errors : {errors}") + + if size_mismatch: + print(f"\n--- Size mismatches ({len(size_mismatch)}) ---") + for p, os_, rs in size_mismatch[:20]: + print(f" {p} orig={os_} recov={rs}") + + if hash_mismatch: + print(f"\n--- Hash mismatches ({len(hash_mismatch)}) ---") + for p in hash_mismatch[:20]: + print(f" {p}") + + if not missing_in_recovery and not extra_in_recovery and not size_mismatch and not hash_mismatch and errors == 0: + print("\n✓ PERFECT MATCH — recovery is identical to original.") + elif not size_mismatch and not hash_mismatch and errors == 0: + print(f"\n✓ All {hash_ok} common files match. " + f"({len(missing_in_recovery)} missing, {len(extra_in_recovery)} extra)") + + print(f"{'='*70}") + + +if __name__ == "__main__": + main()