import os from datetime import datetime from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN, BACKUP_PATH from indexer.scanner import scan_files from indexer.hasher import blake3_file from indexer.db import ( get_connection, create_run, finalize_run, fail_run, load_all_files, batch_insert_files, batch_update_modified, batch_mark_deleted, batch_update_unchanged, ) from indexer.events import batch_log_events from indexer.backup import ensure_backed_up def main(): print("=" * 60) print("ORDINACE DROPBOX BACKUP – INDEXER") print(f"Root : {ROOT_PATH}") print(f"Backup : {BACKUP_PATH}") print(f"DRY RUN : {DRY_RUN}") print("=" * 60) # ── 1. Scan filesystem (fast, no hashing) ── print("\n[1/7] Scanning filesystem...") fs = scan_files(ROOT_PATH) print(f" Found {len(fs)} files on disk.") if DRY_RUN: # V DRY_RUN režimu jen ukážeme co by se stalo print("\n[DRY RUN] No DB connection, showing scan results only.") print(f" Files on disk: {len(fs)}") return # ── 2. Connect & create run ── conn = get_connection() cur = conn.cursor() run_id = create_run(cur) print(f"\n[2/7] Run #{run_id} created.") try: # ── 3. Load DB state ── print("[3/7] Loading DB state...") db = load_all_files(cur) print(f" {len(db)} files in DB (exists_now=1).") # ── 4. Diff ── print("[4/7] Diffing...") fs_paths = set(fs.keys()) db_paths = set(db.keys()) new_paths = fs_paths - db_paths deleted_paths = db_paths - fs_paths existing_paths = fs_paths & db_paths modified_paths = set() unchanged_paths = set() for p in existing_paths: fs_file = fs[p] db_file = db[p] if fs_file["size"] != db_file["size"] or fs_file["mtime"] != db_file["mtime"]: modified_paths.add(p) else: unchanged_paths.add(p) print(f" NEW: {len(new_paths)} MOD: {len(modified_paths)} " f"DEL: {len(deleted_paths)} SAME: {len(unchanged_paths)}") # ── 5. Process changes ── print("[5/7] Processing changes...") events = [] files_to_backup = [] # 5a) NEW files — compute BLAKE3, batch INSERT if new_paths: print(f" Hashing {len(new_paths)} new files...") new_files = [] for p in new_paths: f = fs[p] try: content_hash = blake3_file(f["full_path"]) except (FileNotFoundError, PermissionError, OSError) as e: print(f" WARN: skip {p}: {e}") continue new_files.append({ "relative_path": p, "file_name": f["file_name"], "directory": f["directory"], "size": f["size"], "mtime": f["mtime"], "content_hash": content_hash, }) files_to_backup.append((f["full_path"], content_hash)) if new_files: path_to_id = batch_insert_files(cur, new_files, run_id) for nf in new_files: events.append({ "run_id": run_id, "file_id": path_to_id[nf["relative_path"]], "event_type": "CREATED", "new_size": nf["size"], "new_hash": nf["content_hash"], }) # 5b) MODIFIED files — compute BLAKE3, batch UPDATE if modified_paths: print(f" Hashing {len(modified_paths)} modified files...") mod_files = [] for p in modified_paths: f = fs[p] db_file = db[p] try: content_hash = blake3_file(f["full_path"]) except (FileNotFoundError, PermissionError, OSError) as e: print(f" WARN: skip {p}: {e}") continue mod_files.append({ "id": db_file["id"], "size": f["size"], "mtime": f["mtime"], "content_hash": content_hash, }) events.append({ "run_id": run_id, "file_id": db_file["id"], "event_type": "MODIFIED", "old_size": db_file["size"], "new_size": f["size"], "old_hash": db_file["content_hash"], "new_hash": content_hash, }) files_to_backup.append((f["full_path"], content_hash)) if mod_files: batch_update_modified(cur, mod_files, run_id) # 5c) DELETED files — batch UPDATE exists_now=0 if deleted_paths: del_ids = [db[p]["id"] for p in deleted_paths] batch_mark_deleted(cur, del_ids, run_id) for p in deleted_paths: events.append({ "run_id": run_id, "file_id": db[p]["id"], "event_type": "DELETED", "old_size": db[p]["size"], "old_hash": db[p]["content_hash"], }) # 5d) UNCHANGED files — batch UPDATE last_seen_run if unchanged_paths: unch_ids = [db[p]["id"] for p in unchanged_paths] batch_update_unchanged(cur, unch_ids, run_id) # 5e) Log all events if events: batch_log_events(cur, events) # ── 6. Backup ── if files_to_backup and BACKUP_PATH: print(f"[6/7] Backing up {len(files_to_backup)} files...") backed = ensure_backed_up(files_to_backup, BACKUP_PATH) print(f" {backed} new blobs written.") else: print("[6/7] Nothing to backup.") # ── 7. Finalize ── stats = { "total": len(fs), "new": len(new_paths), "modified": len(modified_paths), "deleted": len(deleted_paths), "unchanged": len(unchanged_paths), } finalize_run(cur, run_id, stats) conn.commit() print(f"[7/7] Run #{run_id} COMPLETED.") except Exception as e: print(f"\nERROR: {e}") try: fail_run(cur, run_id) conn.commit() except Exception: pass conn.rollback() raise finally: conn.close() # ── Summary ── print("\n" + "=" * 60) print(f"Total : {stats['total']}") print(f"New : {stats['new']}") print(f"Modified : {stats['modified']}") print(f"Deleted : {stats['deleted']}") print(f"Unchanged: {stats['unchanged']}") print("=" * 60) # ── 8. Generate Excel report ── try: from report import generate_report report_dir = r"u:\Dropbox\!!!Days\Downloads Z230" timestamp = datetime.now().strftime("%Y-%m-%d %H_%M") report_path = os.path.join(report_dir, f"{timestamp} DropboxBackupReport.xlsx") print(f"\n[8] Generating report...") generate_report(report_path) except Exception as e: print(f" WARN: Report generation failed: {e}") if __name__ == "__main__": main()