""" Recovery script: reconstruct directory tree from a specific run. Usage: python recovery.py For a given run_id, finds all files that existed at that point (first_seen_run <= run_id AND last_seen_run >= run_id) and copies them from backup storage to output_dir preserving the original directory structure. """ import os import sys import shutil from indexer.config import DB_CONFIG, BACKUP_PATH from indexer.db import get_connection from indexer.backup import blob_path def recover(run_id: int, output_dir: str): conn = get_connection() cur = conn.cursor() cur.execute( """SELECT relative_path, content_hash FROM files WHERE first_seen_run <= %s AND last_seen_run >= %s""", (run_id, run_id) ) rows = cur.fetchall() conn.close() if not rows: print(f"No files found for run #{run_id}.") return print(f"Recovering {len(rows)} files from run #{run_id} to {output_dir}") recovered = 0 missing = 0 for relative_path, content_hash in rows: source = blob_path(BACKUP_PATH, content_hash) target = os.path.join(output_dir, relative_path.replace("/", os.sep)) if not os.path.exists(source): print(f" MISSING blob: {content_hash.hex()} for {relative_path}") missing += 1 continue os.makedirs(os.path.dirname(target), exist_ok=True) shutil.copy2(source, target) recovered += 1 print(f"\nRecovered: {recovered} Missing blobs: {missing}") if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: python recovery.py ") sys.exit(1) run_id = int(sys.argv[1]) output_dir = sys.argv[2] recover(run_id, output_dir)