216 lines
7.3 KiB
Python
216 lines
7.3 KiB
Python
import os
|
||
from datetime import datetime
|
||
|
||
from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN, BACKUP_PATH
|
||
from indexer.scanner import scan_files
|
||
from indexer.hasher import blake3_file
|
||
from indexer.db import (
|
||
get_connection, create_run, finalize_run, fail_run,
|
||
load_all_files, batch_insert_files, batch_update_modified,
|
||
batch_mark_deleted, batch_update_unchanged,
|
||
)
|
||
from indexer.events import batch_log_events
|
||
from indexer.backup import ensure_backed_up
|
||
|
||
|
||
def main():
|
||
print("=" * 60)
|
||
print("ORDINACE DROPBOX BACKUP – INDEXER")
|
||
print(f"Root : {ROOT_PATH}")
|
||
print(f"Backup : {BACKUP_PATH}")
|
||
print(f"DRY RUN : {DRY_RUN}")
|
||
print("=" * 60)
|
||
|
||
# ── 1. Scan filesystem (fast, no hashing) ──
|
||
print("\n[1/7] Scanning filesystem...")
|
||
fs = scan_files(ROOT_PATH)
|
||
print(f" Found {len(fs)} files on disk.")
|
||
|
||
if DRY_RUN:
|
||
# V DRY_RUN režimu jen ukážeme co by se stalo
|
||
print("\n[DRY RUN] No DB connection, showing scan results only.")
|
||
print(f" Files on disk: {len(fs)}")
|
||
return
|
||
|
||
# ── 2. Connect & create run ──
|
||
conn = get_connection()
|
||
cur = conn.cursor()
|
||
run_id = create_run(cur)
|
||
print(f"\n[2/7] Run #{run_id} created.")
|
||
|
||
try:
|
||
# ── 3. Load DB state ──
|
||
print("[3/7] Loading DB state...")
|
||
db = load_all_files(cur)
|
||
print(f" {len(db)} files in DB (exists_now=1).")
|
||
|
||
# ── 4. Diff ──
|
||
print("[4/7] Diffing...")
|
||
fs_paths = set(fs.keys())
|
||
db_paths = set(db.keys())
|
||
|
||
new_paths = fs_paths - db_paths
|
||
deleted_paths = db_paths - fs_paths
|
||
existing_paths = fs_paths & db_paths
|
||
|
||
modified_paths = set()
|
||
unchanged_paths = set()
|
||
for p in existing_paths:
|
||
fs_file = fs[p]
|
||
db_file = db[p]
|
||
if fs_file["size"] != db_file["size"] or fs_file["mtime"] != db_file["mtime"]:
|
||
modified_paths.add(p)
|
||
else:
|
||
unchanged_paths.add(p)
|
||
|
||
print(f" NEW: {len(new_paths)} MOD: {len(modified_paths)} "
|
||
f"DEL: {len(deleted_paths)} SAME: {len(unchanged_paths)}")
|
||
|
||
# ── 5. Process changes ──
|
||
print("[5/7] Processing changes...")
|
||
events = []
|
||
files_to_backup = []
|
||
|
||
# 5a) NEW files — compute BLAKE3, batch INSERT
|
||
if new_paths:
|
||
print(f" Hashing {len(new_paths)} new files...")
|
||
new_files = []
|
||
for p in new_paths:
|
||
f = fs[p]
|
||
try:
|
||
content_hash = blake3_file(f["full_path"])
|
||
except (FileNotFoundError, PermissionError, OSError) as e:
|
||
print(f" WARN: skip {p}: {e}")
|
||
continue
|
||
new_files.append({
|
||
"relative_path": p,
|
||
"file_name": f["file_name"],
|
||
"directory": f["directory"],
|
||
"size": f["size"],
|
||
"mtime": f["mtime"],
|
||
"content_hash": content_hash,
|
||
})
|
||
files_to_backup.append((f["full_path"], content_hash))
|
||
|
||
if new_files:
|
||
path_to_id = batch_insert_files(cur, new_files, run_id)
|
||
for nf in new_files:
|
||
events.append({
|
||
"run_id": run_id,
|
||
"file_id": path_to_id[nf["relative_path"]],
|
||
"event_type": "CREATED",
|
||
"new_size": nf["size"],
|
||
"new_hash": nf["content_hash"],
|
||
})
|
||
|
||
# 5b) MODIFIED files — compute BLAKE3, batch UPDATE
|
||
if modified_paths:
|
||
print(f" Hashing {len(modified_paths)} modified files...")
|
||
mod_files = []
|
||
for p in modified_paths:
|
||
f = fs[p]
|
||
db_file = db[p]
|
||
try:
|
||
content_hash = blake3_file(f["full_path"])
|
||
except (FileNotFoundError, PermissionError, OSError) as e:
|
||
print(f" WARN: skip {p}: {e}")
|
||
continue
|
||
mod_files.append({
|
||
"id": db_file["id"],
|
||
"size": f["size"],
|
||
"mtime": f["mtime"],
|
||
"content_hash": content_hash,
|
||
})
|
||
events.append({
|
||
"run_id": run_id,
|
||
"file_id": db_file["id"],
|
||
"event_type": "MODIFIED",
|
||
"old_size": db_file["size"],
|
||
"new_size": f["size"],
|
||
"old_hash": db_file["content_hash"],
|
||
"new_hash": content_hash,
|
||
})
|
||
files_to_backup.append((f["full_path"], content_hash))
|
||
|
||
if mod_files:
|
||
batch_update_modified(cur, mod_files, run_id)
|
||
|
||
# 5c) DELETED files — batch UPDATE exists_now=0
|
||
if deleted_paths:
|
||
del_ids = [db[p]["id"] for p in deleted_paths]
|
||
batch_mark_deleted(cur, del_ids, run_id)
|
||
for p in deleted_paths:
|
||
events.append({
|
||
"run_id": run_id,
|
||
"file_id": db[p]["id"],
|
||
"event_type": "DELETED",
|
||
"old_size": db[p]["size"],
|
||
"old_hash": db[p]["content_hash"],
|
||
})
|
||
|
||
# 5d) UNCHANGED files — batch UPDATE last_seen_run
|
||
if unchanged_paths:
|
||
unch_ids = [db[p]["id"] for p in unchanged_paths]
|
||
batch_update_unchanged(cur, unch_ids, run_id)
|
||
|
||
# 5e) Log all events
|
||
if events:
|
||
batch_log_events(cur, events)
|
||
|
||
# ── 6. Backup ──
|
||
if files_to_backup and BACKUP_PATH:
|
||
print(f"[6/7] Backing up {len(files_to_backup)} files...")
|
||
backed = ensure_backed_up(files_to_backup, BACKUP_PATH)
|
||
print(f" {backed} new blobs written.")
|
||
else:
|
||
print("[6/7] Nothing to backup.")
|
||
|
||
# ── 7. Finalize ──
|
||
stats = {
|
||
"total": len(fs),
|
||
"new": len(new_paths),
|
||
"modified": len(modified_paths),
|
||
"deleted": len(deleted_paths),
|
||
"unchanged": len(unchanged_paths),
|
||
}
|
||
finalize_run(cur, run_id, stats)
|
||
conn.commit()
|
||
print(f"[7/7] Run #{run_id} COMPLETED.")
|
||
|
||
except Exception as e:
|
||
print(f"\nERROR: {e}")
|
||
try:
|
||
fail_run(cur, run_id)
|
||
conn.commit()
|
||
except Exception:
|
||
pass
|
||
conn.rollback()
|
||
raise
|
||
finally:
|
||
conn.close()
|
||
|
||
# ── Summary ──
|
||
print("\n" + "=" * 60)
|
||
print(f"Total : {stats['total']}")
|
||
print(f"New : {stats['new']}")
|
||
print(f"Modified : {stats['modified']}")
|
||
print(f"Deleted : {stats['deleted']}")
|
||
print(f"Unchanged: {stats['unchanged']}")
|
||
print("=" * 60)
|
||
|
||
# ── 8. Generate Excel report ──
|
||
try:
|
||
from report import generate_report
|
||
|
||
report_dir = r"u:\Dropbox\!!!Days\Downloads Z230"
|
||
timestamp = datetime.now().strftime("%Y-%m-%d %H_%M")
|
||
report_path = os.path.join(report_dir, f"{timestamp} DropboxBackupReport.xlsx")
|
||
print(f"\n[8] Generating report...")
|
||
generate_report(report_path)
|
||
except Exception as e:
|
||
print(f" WARN: Report generation failed: {e}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|