z230
This commit is contained in:
@@ -63,23 +63,33 @@ def load_all_files(cur) -> dict:
|
||||
|
||||
def batch_insert_files(cur, files_list: list, run_id: int) -> dict:
|
||||
"""
|
||||
Batch INSERT nových souborů.
|
||||
Batch INSERT (or re-activate) souborů.
|
||||
Handles re-appearing files that were previously deleted (exists_now=0)
|
||||
via ON DUPLICATE KEY UPDATE.
|
||||
files_list: [{relative_path, file_name, directory, size, mtime, content_hash}]
|
||||
Returns: {relative_path: file_id}
|
||||
"""
|
||||
path_to_id = {}
|
||||
for i in range(0, len(files_list), BATCH_SIZE):
|
||||
chunk = files_list[i:i + BATCH_SIZE]
|
||||
cur.executemany(
|
||||
"""INSERT INTO files
|
||||
(relative_path, file_name, directory, file_size, mtime,
|
||||
content_hash, first_seen_run, last_seen_run, exists_now)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 1)""",
|
||||
[(f["relative_path"], f["file_name"], f["directory"],
|
||||
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
|
||||
for f in chunk]
|
||||
)
|
||||
# Fetch real IDs — lastrowid+j is unreliable with executemany
|
||||
for f in chunk:
|
||||
cur.execute(
|
||||
"""INSERT INTO files
|
||||
(relative_path, file_name, directory, file_size, mtime,
|
||||
content_hash, first_seen_run, last_seen_run, exists_now)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 1)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
file_name = VALUES(file_name),
|
||||
directory = VALUES(directory),
|
||||
file_size = VALUES(file_size),
|
||||
mtime = VALUES(mtime),
|
||||
content_hash = VALUES(content_hash),
|
||||
last_seen_run = VALUES(last_seen_run),
|
||||
exists_now = 1""",
|
||||
(f["relative_path"], f["file_name"], f["directory"],
|
||||
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
|
||||
)
|
||||
# Fetch real IDs
|
||||
paths = [f["relative_path"] for f in chunk]
|
||||
placeholders = ",".join(["%s"] * len(paths))
|
||||
cur.execute(
|
||||
|
||||
@@ -54,10 +54,10 @@ def main():
|
||||
print(f" Found {len(db_hashes)} distinct hashes in DB.")
|
||||
|
||||
# --- Options ---
|
||||
# PURGE_ORPHANS = True # uncomment to delete orphan blobs
|
||||
PURGE_ORPHANS = False
|
||||
# PURGE_ORPHANS = True # uncomment to delete orphan blobs
|
||||
PURGE_ORPHANS = False
|
||||
|
||||
# --- Reconcile ---
|
||||
# --- Reconcile ---
|
||||
missing_on_disk = db_hashes - disk_hashes
|
||||
orphans_on_disk = disk_hashes - db_hashes
|
||||
matched = db_hashes & disk_hashes
|
||||
|
||||
80
recovery.py
80
recovery.py
@@ -1,7 +1,8 @@
|
||||
"""
|
||||
Recovery script: reconstruct directory tree from a specific run.
|
||||
|
||||
Usage: python recovery.py <run_id> <output_dir>
|
||||
Usage: python recovery.py
|
||||
(interactive — shows last 10 runs, asks which one to recover)
|
||||
|
||||
For a given run_id, finds all files that existed at that point
|
||||
(first_seen_run <= run_id AND last_seen_run >= run_id)
|
||||
@@ -16,6 +17,39 @@ from indexer.config import BACKUP_PATH, BACKUP_PASSWORD
|
||||
from indexer.db import get_connection
|
||||
from indexer.backup import blob_path
|
||||
|
||||
DEFAULT_OUTPUT_DIR = r"U:\recovery"
|
||||
|
||||
|
||||
def show_last_runs(n: int = 10):
|
||||
"""Show last N completed runs and return the list."""
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""SELECT id, started_at, finished_at, status,
|
||||
files_total, files_new, files_modified, files_deleted
|
||||
FROM runs
|
||||
ORDER BY id DESC
|
||||
LIMIT %s""",
|
||||
(n,)
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
conn.close()
|
||||
|
||||
if not rows:
|
||||
print("No runs found in DB.")
|
||||
return []
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print(f"{'Run':>5} {'Started':>19} {'Status':>10} {'Total':>7} {'New':>5} {'Mod':>5} {'Del':>5}")
|
||||
print(f"{'-'*80}")
|
||||
for row in reversed(rows):
|
||||
run_id, started, finished, status, total, new, mod, deleted = row
|
||||
started_str = started.strftime("%Y-%m-%d %H:%M:%S") if started else "?"
|
||||
print(f"{run_id:>5} {started_str:>19} {status:>10} {total or 0:>7} {new or 0:>5} {mod or 0:>5} {deleted or 0:>5}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
return [r[0] for r in rows]
|
||||
|
||||
|
||||
def recover(run_id: int, output_dir: str):
|
||||
conn = get_connection()
|
||||
@@ -34,12 +68,12 @@ def recover(run_id: int, output_dir: str):
|
||||
print(f"No files found for run #{run_id}.")
|
||||
return
|
||||
|
||||
print(f"Recovering {len(rows)} files from run #{run_id} to {output_dir}")
|
||||
print(f"\nRecovering {len(rows)} files from run #{run_id} to {output_dir}")
|
||||
recovered = 0
|
||||
missing = 0
|
||||
password = BACKUP_PASSWORD.encode("utf-8")
|
||||
|
||||
for relative_path, content_hash in rows:
|
||||
for i, (relative_path, content_hash) in enumerate(rows, 1):
|
||||
source = blob_path(BACKUP_PATH, content_hash)
|
||||
target = os.path.join(output_dir, relative_path.replace("/", os.sep))
|
||||
|
||||
@@ -67,14 +101,44 @@ def recover(run_id: int, output_dir: str):
|
||||
missing += 1
|
||||
continue
|
||||
|
||||
print(f"\nRecovered: {recovered} Missing/errors: {missing}")
|
||||
if i % 1000 == 0:
|
||||
print(f" [{i}/{len(rows)}] recovered={recovered} missing={missing}")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Recovery complete.")
|
||||
print(f" Run : #{run_id}")
|
||||
print(f" Output dir : {output_dir}")
|
||||
print(f" Recovered : {recovered}")
|
||||
print(f" Missing/err : {missing}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python recovery.py <run_id> <output_dir>")
|
||||
run_ids = show_last_runs(10)
|
||||
|
||||
if not run_ids:
|
||||
sys.exit(1)
|
||||
|
||||
run_id = int(sys.argv[1])
|
||||
output_dir = sys.argv[2]
|
||||
print()
|
||||
choice = input("Enter run ID to recover (or 'q' to quit): ").strip()
|
||||
if choice.lower() == "q":
|
||||
print("Aborted.")
|
||||
sys.exit(0)
|
||||
|
||||
try:
|
||||
run_id = int(choice)
|
||||
except ValueError:
|
||||
print(f"Invalid run ID: {choice}")
|
||||
sys.exit(1)
|
||||
|
||||
output_dir = DEFAULT_OUTPUT_DIR
|
||||
print(f"\nOutput directory: {output_dir}")
|
||||
|
||||
if os.path.exists(output_dir) and os.listdir(output_dir):
|
||||
confirm = input("Directory is not empty. Continue? (y/n): ").strip().lower()
|
||||
if confirm != "y":
|
||||
print("Aborted.")
|
||||
sys.exit(0)
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
recover(run_id, output_dir)
|
||||
|
||||
Reference in New Issue
Block a user