""" Recovery script: reconstruct directory tree from a specific run. Usage: python recovery.py (interactive — shows last 10 runs, asks which one to recover) For a given run_id, finds all files that existed at that point (first_seen_run <= run_id AND last_seen_run >= run_id) and copies them from backup storage to output_dir preserving the original directory structure. """ import os import sys import pyzipper from indexer.config import BACKUP_PATH, BACKUP_PASSWORD from indexer.db import get_connection from indexer.backup import blob_path DEFAULT_OUTPUT_DIR = r"U:\recovery" def show_last_runs(n: int = 10): """Show last N completed runs and return the list.""" conn = get_connection() cur = conn.cursor() cur.execute( """SELECT id, started_at, finished_at, status, files_total, files_new, files_modified, files_deleted FROM runs ORDER BY id DESC LIMIT %s""", (n,) ) rows = cur.fetchall() conn.close() if not rows: print("No runs found in DB.") return [] print(f"\n{'='*80}") print(f"{'Run':>5} {'Started':>19} {'Status':>10} {'Total':>7} {'New':>5} {'Mod':>5} {'Del':>5}") print(f"{'-'*80}") for row in reversed(rows): run_id, started, finished, status, total, new, mod, deleted = row started_str = started.strftime("%Y-%m-%d %H:%M:%S") if started else "?" print(f"{run_id:>5} {started_str:>19} {status:>10} {total or 0:>7} {new or 0:>5} {mod or 0:>5} {deleted or 0:>5}") print(f"{'='*80}") return [r[0] for r in rows] def recover(run_id: int, output_dir: str): conn = get_connection() cur = conn.cursor() cur.execute( """SELECT relative_path, content_hash FROM files WHERE first_seen_run <= %s AND last_seen_run >= %s""", (run_id, run_id) ) rows = cur.fetchall() conn.close() if not rows: print(f"No files found for run #{run_id}.") return print(f"\nRecovering {len(rows)} files from run #{run_id} to {output_dir}") recovered = 0 missing = 0 password = BACKUP_PASSWORD.encode("utf-8") for i, (relative_path, content_hash) in enumerate(rows, 1): source = blob_path(BACKUP_PATH, content_hash) target = os.path.join(output_dir, relative_path.replace("/", os.sep)) if not os.path.exists(source): print(f" MISSING zip: {content_hash.hex()} for {relative_path}") missing += 1 continue os.makedirs(os.path.dirname(target), exist_ok=True) try: with pyzipper.AESZipFile(source, "r") as zf: zf.setpassword(password) names = zf.namelist() if not names: print(f" WARN: empty zip: {source}") missing += 1 continue data = zf.read(names[0]) with open(target, "wb") as f: f.write(data) recovered += 1 except Exception as e: print(f" ERROR extracting {source} for {relative_path}: {e}") missing += 1 continue if i % 1000 == 0: print(f" [{i}/{len(rows)}] recovered={recovered} missing={missing}") print(f"\n{'='*60}") print(f"Recovery complete.") print(f" Run : #{run_id}") print(f" Output dir : {output_dir}") print(f" Recovered : {recovered}") print(f" Missing/err : {missing}") print(f"{'='*60}") if __name__ == "__main__": run_ids = show_last_runs(10) if not run_ids: sys.exit(1) print() choice = input("Enter run ID to recover (or 'q' to quit): ").strip() if choice.lower() == "q": print("Aborted.") sys.exit(0) try: run_id = int(choice) except ValueError: print(f"Invalid run ID: {choice}") sys.exit(1) output_dir = DEFAULT_OUTPUT_DIR print(f"\nOutput directory: {output_dir}") if os.path.exists(output_dir) and os.listdir(output_dir): confirm = input("Directory is not empty. Continue? (y/n): ").strip().lower() if confirm != "y": print("Aborted.") sys.exit(0) os.makedirs(output_dir, exist_ok=True) recover(run_id, output_dir)