""" Recovery script: reconstruct directory tree from a specific run. Usage: python recovery.py For a given run_id, finds all files that existed at that point (first_seen_run <= run_id AND last_seen_run >= run_id) and copies them from backup storage to output_dir preserving the original directory structure. """ import os import sys import pyzipper from indexer.config import BACKUP_PATH, BACKUP_PASSWORD from indexer.db import get_connection from indexer.backup import blob_path def recover(run_id: int, output_dir: str): conn = get_connection() cur = conn.cursor() cur.execute( """SELECT relative_path, content_hash FROM files WHERE first_seen_run <= %s AND last_seen_run >= %s""", (run_id, run_id) ) rows = cur.fetchall() conn.close() if not rows: print(f"No files found for run #{run_id}.") return print(f"Recovering {len(rows)} files from run #{run_id} to {output_dir}") recovered = 0 missing = 0 password = BACKUP_PASSWORD.encode("utf-8") for relative_path, content_hash in rows: source = blob_path(BACKUP_PATH, content_hash) target = os.path.join(output_dir, relative_path.replace("/", os.sep)) if not os.path.exists(source): print(f" MISSING zip: {content_hash.hex()} for {relative_path}") missing += 1 continue os.makedirs(os.path.dirname(target), exist_ok=True) try: with pyzipper.AESZipFile(source, "r") as zf: zf.setpassword(password) names = zf.namelist() if not names: print(f" WARN: empty zip: {source}") missing += 1 continue data = zf.read(names[0]) with open(target, "wb") as f: f.write(data) recovered += 1 except Exception as e: print(f" ERROR extracting {source} for {relative_path}: {e}") missing += 1 continue print(f"\nRecovered: {recovered} Missing/errors: {missing}") if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: python recovery.py ") sys.exit(1) run_id = int(sys.argv[1]) output_dir = sys.argv[2] recover(run_id, output_dir)