145 lines
4.2 KiB
Python
145 lines
4.2 KiB
Python
"""
|
|
Recovery script: reconstruct directory tree from a specific run.
|
|
|
|
Usage: python recovery.py
|
|
(interactive — shows last 10 runs, asks which one to recover)
|
|
|
|
For a given run_id, finds all files that existed at that point
|
|
(first_seen_run <= run_id AND last_seen_run >= run_id)
|
|
and copies them from backup storage to output_dir preserving
|
|
the original directory structure.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import pyzipper
|
|
from indexer.config import BACKUP_PATH, BACKUP_PASSWORD
|
|
from indexer.db import get_connection
|
|
from indexer.backup import blob_path
|
|
|
|
DEFAULT_OUTPUT_DIR = r"U:\recovery"
|
|
|
|
|
|
def show_last_runs(n: int = 10):
|
|
"""Show last N completed runs and return the list."""
|
|
conn = get_connection()
|
|
cur = conn.cursor()
|
|
cur.execute(
|
|
"""SELECT id, started_at, finished_at, status,
|
|
files_total, files_new, files_modified, files_deleted
|
|
FROM runs
|
|
ORDER BY id DESC
|
|
LIMIT %s""",
|
|
(n,)
|
|
)
|
|
rows = cur.fetchall()
|
|
conn.close()
|
|
|
|
if not rows:
|
|
print("No runs found in DB.")
|
|
return []
|
|
|
|
print(f"\n{'='*80}")
|
|
print(f"{'Run':>5} {'Started':>19} {'Status':>10} {'Total':>7} {'New':>5} {'Mod':>5} {'Del':>5}")
|
|
print(f"{'-'*80}")
|
|
for row in reversed(rows):
|
|
run_id, started, finished, status, total, new, mod, deleted = row
|
|
started_str = started.strftime("%Y-%m-%d %H:%M:%S") if started else "?"
|
|
print(f"{run_id:>5} {started_str:>19} {status:>10} {total or 0:>7} {new or 0:>5} {mod or 0:>5} {deleted or 0:>5}")
|
|
print(f"{'='*80}")
|
|
|
|
return [r[0] for r in rows]
|
|
|
|
|
|
def recover(run_id: int, output_dir: str):
|
|
conn = get_connection()
|
|
cur = conn.cursor()
|
|
|
|
cur.execute(
|
|
"""SELECT relative_path, content_hash
|
|
FROM files
|
|
WHERE first_seen_run <= %s AND last_seen_run >= %s""",
|
|
(run_id, run_id)
|
|
)
|
|
rows = cur.fetchall()
|
|
conn.close()
|
|
|
|
if not rows:
|
|
print(f"No files found for run #{run_id}.")
|
|
return
|
|
|
|
print(f"\nRecovering {len(rows)} files from run #{run_id} to {output_dir}")
|
|
recovered = 0
|
|
missing = 0
|
|
password = BACKUP_PASSWORD.encode("utf-8")
|
|
|
|
for i, (relative_path, content_hash) in enumerate(rows, 1):
|
|
source = blob_path(BACKUP_PATH, content_hash)
|
|
target = os.path.join(output_dir, relative_path.replace("/", os.sep))
|
|
|
|
if not os.path.exists(source):
|
|
print(f" MISSING zip: {content_hash.hex()} for {relative_path}")
|
|
missing += 1
|
|
continue
|
|
|
|
os.makedirs(os.path.dirname(target), exist_ok=True)
|
|
|
|
try:
|
|
with pyzipper.AESZipFile(source, "r") as zf:
|
|
zf.setpassword(password)
|
|
names = zf.namelist()
|
|
if not names:
|
|
print(f" WARN: empty zip: {source}")
|
|
missing += 1
|
|
continue
|
|
data = zf.read(names[0])
|
|
with open(target, "wb") as f:
|
|
f.write(data)
|
|
recovered += 1
|
|
except Exception as e:
|
|
print(f" ERROR extracting {source} for {relative_path}: {e}")
|
|
missing += 1
|
|
continue
|
|
|
|
if i % 1000 == 0:
|
|
print(f" [{i}/{len(rows)}] recovered={recovered} missing={missing}")
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Recovery complete.")
|
|
print(f" Run : #{run_id}")
|
|
print(f" Output dir : {output_dir}")
|
|
print(f" Recovered : {recovered}")
|
|
print(f" Missing/err : {missing}")
|
|
print(f"{'='*60}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run_ids = show_last_runs(10)
|
|
|
|
if not run_ids:
|
|
sys.exit(1)
|
|
|
|
print()
|
|
choice = input("Enter run ID to recover (or 'q' to quit): ").strip()
|
|
if choice.lower() == "q":
|
|
print("Aborted.")
|
|
sys.exit(0)
|
|
|
|
try:
|
|
run_id = int(choice)
|
|
except ValueError:
|
|
print(f"Invalid run ID: {choice}")
|
|
sys.exit(1)
|
|
|
|
output_dir = DEFAULT_OUTPUT_DIR
|
|
print(f"\nOutput directory: {output_dir}")
|
|
|
|
if os.path.exists(output_dir) and os.listdir(output_dir):
|
|
confirm = input("Directory is not empty. Continue? (y/n): ").strip().lower()
|
|
if confirm != "y":
|
|
print("Aborted.")
|
|
sys.exit(0)
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
recover(run_id, output_dir)
|