z230
This commit is contained in:
64
recovery.py
Normal file
64
recovery.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""
|
||||
Recovery script: reconstruct directory tree from a specific run.
|
||||
|
||||
Usage: python recovery.py <run_id> <output_dir>
|
||||
|
||||
For a given run_id, finds all files that existed at that point
|
||||
(first_seen_run <= run_id AND last_seen_run >= run_id)
|
||||
and copies them from backup storage to output_dir preserving
|
||||
the original directory structure.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
from indexer.config import DB_CONFIG, BACKUP_PATH
|
||||
from indexer.db import get_connection
|
||||
from indexer.backup import blob_path
|
||||
|
||||
|
||||
def recover(run_id: int, output_dir: str):
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute(
|
||||
"""SELECT relative_path, content_hash
|
||||
FROM files
|
||||
WHERE first_seen_run <= %s AND last_seen_run >= %s""",
|
||||
(run_id, run_id)
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
conn.close()
|
||||
|
||||
if not rows:
|
||||
print(f"No files found for run #{run_id}.")
|
||||
return
|
||||
|
||||
print(f"Recovering {len(rows)} files from run #{run_id} to {output_dir}")
|
||||
recovered = 0
|
||||
missing = 0
|
||||
|
||||
for relative_path, content_hash in rows:
|
||||
source = blob_path(BACKUP_PATH, content_hash)
|
||||
target = os.path.join(output_dir, relative_path.replace("/", os.sep))
|
||||
|
||||
if not os.path.exists(source):
|
||||
print(f" MISSING blob: {content_hash.hex()} for {relative_path}")
|
||||
missing += 1
|
||||
continue
|
||||
|
||||
os.makedirs(os.path.dirname(target), exist_ok=True)
|
||||
shutil.copy2(source, target)
|
||||
recovered += 1
|
||||
|
||||
print(f"\nRecovered: {recovered} Missing blobs: {missing}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python recovery.py <run_id> <output_dir>")
|
||||
sys.exit(1)
|
||||
|
||||
run_id = int(sys.argv[1])
|
||||
output_dir = sys.argv[2]
|
||||
recover(run_id, output_dir)
|
||||
Reference in New Issue
Block a user