drobboxordinacebackup/recovery.py

"""
Recovery script: reconstruct directory tree from a specific run.

Usage: python recovery.py
       (interactive — shows last 10 runs, asks which one to recover)

For a given run_id, finds all files that existed at that point
(first_seen_run <= run_id AND last_seen_run >= run_id)
and copies them from backup storage to output_dir preserving
the original directory structure.
"""

import os
import sys
import pyzipper
from indexer.config import BACKUP_PATH, BACKUP_PASSWORD
from indexer.db import get_connection
from indexer.backup import blob_path

DEFAULT_OUTPUT_DIR = r"U:\recovery"


def show_last_runs(n: int = 10):
    """Show last N completed runs and return the list."""
    conn = get_connection()
    cur = conn.cursor()
    cur.execute(
        """SELECT id, started_at, finished_at, status,
                  files_total, files_new, files_modified, files_deleted
           FROM runs
           ORDER BY id DESC
           LIMIT %s""",
        (n,)
    )
    rows = cur.fetchall()
    conn.close()

    if not rows:
        print("No runs found in DB.")
        return []

    print(f"\n{'='*80}")
    print(f"{'Run':>5}  {'Started':>19}  {'Status':>10}  {'Total':>7}  {'New':>5}  {'Mod':>5}  {'Del':>5}")
    print(f"{'-'*80}")
    for row in reversed(rows):
        run_id, started, finished, status, total, new, mod, deleted = row
        started_str = started.strftime("%Y-%m-%d %H:%M:%S") if started else "?"
        print(f"{run_id:>5}  {started_str:>19}  {status:>10}  {total or 0:>7}  {new or 0:>5}  {mod or 0:>5}  {deleted or 0:>5}")
    print(f"{'='*80}")

    return [r[0] for r in rows]


def recover(run_id: int, output_dir: str):
    conn = get_connection()
    cur = conn.cursor()

    cur.execute(
        """SELECT relative_path, content_hash
           FROM files
           WHERE first_seen_run <= %s AND last_seen_run >= %s""",
        (run_id, run_id)
    )
    rows = cur.fetchall()
    conn.close()

    if not rows:
        print(f"No files found for run #{run_id}.")
        return

    print(f"\nRecovering {len(rows)} files from run #{run_id} to {output_dir}")
    recovered = 0
    missing = 0
    password = BACKUP_PASSWORD.encode("utf-8")

    for i, (relative_path, content_hash) in enumerate(rows, 1):
        source = blob_path(BACKUP_PATH, content_hash)
        target = os.path.join(output_dir, relative_path.replace("/", os.sep))

        if not os.path.exists(source):
            print(f"  MISSING zip: {content_hash.hex()} for {relative_path}")
            missing += 1
            continue

        os.makedirs(os.path.dirname(target), exist_ok=True)

        try:
            with pyzipper.AESZipFile(source, "r") as zf:
                zf.setpassword(password)
                names = zf.namelist()
                if not names:
                    print(f"  WARN: empty zip: {source}")
                    missing += 1
                    continue
                data = zf.read(names[0])
            with open(target, "wb") as f:
                f.write(data)
            recovered += 1
        except Exception as e:
            print(f"  ERROR extracting {source} for {relative_path}: {e}")
            missing += 1
            continue

        if i % 1000 == 0:
            print(f"  [{i}/{len(rows)}] recovered={recovered} missing={missing}")

    print(f"\n{'='*60}")
    print(f"Recovery complete.")
    print(f"  Run          : #{run_id}")
    print(f"  Output dir   : {output_dir}")
    print(f"  Recovered    : {recovered}")
    print(f"  Missing/err  : {missing}")
    print(f"{'='*60}")


if __name__ == "__main__":
    run_ids = show_last_runs(10)

    if not run_ids:
        sys.exit(1)

    print()
    choice = input("Enter run ID to recover (or 'q' to quit): ").strip()
    if choice.lower() == "q":
        print("Aborted.")
        sys.exit(0)

    try:
        run_id = int(choice)
    except ValueError:
        print(f"Invalid run ID: {choice}")
        sys.exit(1)

    output_dir = DEFAULT_OUTPUT_DIR
    print(f"\nOutput directory: {output_dir}")

    if os.path.exists(output_dir) and os.listdir(output_dir):
        confirm = input("Directory is not empty. Continue? (y/n): ").strip().lower()
        if confirm != "y":
            print("Aborted.")
            sys.exit(0)

    os.makedirs(output_dir, exist_ok=True)
    recover(run_id, output_dir)