Files
2026-02-10 10:29:20 +01:00

216 lines
7.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
from datetime import datetime
from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN, BACKUP_PATH
from indexer.scanner import scan_files
from indexer.hasher import blake3_file
from indexer.db import (
get_connection, create_run, finalize_run, fail_run,
load_all_files, batch_insert_files, batch_update_modified,
batch_mark_deleted, batch_update_unchanged,
)
from indexer.events import batch_log_events
from indexer.backup import ensure_backed_up
def main():
print("=" * 60)
print("ORDINACE DROPBOX BACKUP INDEXER")
print(f"Root : {ROOT_PATH}")
print(f"Backup : {BACKUP_PATH}")
print(f"DRY RUN : {DRY_RUN}")
print("=" * 60)
# ── 1. Scan filesystem (fast, no hashing) ──
print("\n[1/7] Scanning filesystem...")
fs = scan_files(ROOT_PATH)
print(f" Found {len(fs)} files on disk.")
if DRY_RUN:
# V DRY_RUN režimu jen ukážeme co by se stalo
print("\n[DRY RUN] No DB connection, showing scan results only.")
print(f" Files on disk: {len(fs)}")
return
# ── 2. Connect & create run ──
conn = get_connection()
cur = conn.cursor()
run_id = create_run(cur)
print(f"\n[2/7] Run #{run_id} created.")
try:
# ── 3. Load DB state ──
print("[3/7] Loading DB state...")
db = load_all_files(cur)
print(f" {len(db)} files in DB (exists_now=1).")
# ── 4. Diff ──
print("[4/7] Diffing...")
fs_paths = set(fs.keys())
db_paths = set(db.keys())
new_paths = fs_paths - db_paths
deleted_paths = db_paths - fs_paths
existing_paths = fs_paths & db_paths
modified_paths = set()
unchanged_paths = set()
for p in existing_paths:
fs_file = fs[p]
db_file = db[p]
if fs_file["size"] != db_file["size"] or fs_file["mtime"] != db_file["mtime"]:
modified_paths.add(p)
else:
unchanged_paths.add(p)
print(f" NEW: {len(new_paths)} MOD: {len(modified_paths)} "
f"DEL: {len(deleted_paths)} SAME: {len(unchanged_paths)}")
# ── 5. Process changes ──
print("[5/7] Processing changes...")
events = []
files_to_backup = []
# 5a) NEW files — compute BLAKE3, batch INSERT
if new_paths:
print(f" Hashing {len(new_paths)} new files...")
new_files = []
for p in new_paths:
f = fs[p]
try:
content_hash = blake3_file(f["full_path"])
except (FileNotFoundError, PermissionError, OSError) as e:
print(f" WARN: skip {p}: {e}")
continue
new_files.append({
"relative_path": p,
"file_name": f["file_name"],
"directory": f["directory"],
"size": f["size"],
"mtime": f["mtime"],
"content_hash": content_hash,
})
files_to_backup.append((f["full_path"], content_hash))
if new_files:
path_to_id = batch_insert_files(cur, new_files, run_id)
for nf in new_files:
events.append({
"run_id": run_id,
"file_id": path_to_id[nf["relative_path"]],
"event_type": "CREATED",
"new_size": nf["size"],
"new_hash": nf["content_hash"],
})
# 5b) MODIFIED files — compute BLAKE3, batch UPDATE
if modified_paths:
print(f" Hashing {len(modified_paths)} modified files...")
mod_files = []
for p in modified_paths:
f = fs[p]
db_file = db[p]
try:
content_hash = blake3_file(f["full_path"])
except (FileNotFoundError, PermissionError, OSError) as e:
print(f" WARN: skip {p}: {e}")
continue
mod_files.append({
"id": db_file["id"],
"size": f["size"],
"mtime": f["mtime"],
"content_hash": content_hash,
})
events.append({
"run_id": run_id,
"file_id": db_file["id"],
"event_type": "MODIFIED",
"old_size": db_file["size"],
"new_size": f["size"],
"old_hash": db_file["content_hash"],
"new_hash": content_hash,
})
files_to_backup.append((f["full_path"], content_hash))
if mod_files:
batch_update_modified(cur, mod_files, run_id)
# 5c) DELETED files — batch UPDATE exists_now=0
if deleted_paths:
del_ids = [db[p]["id"] for p in deleted_paths]
batch_mark_deleted(cur, del_ids, run_id)
for p in deleted_paths:
events.append({
"run_id": run_id,
"file_id": db[p]["id"],
"event_type": "DELETED",
"old_size": db[p]["size"],
"old_hash": db[p]["content_hash"],
})
# 5d) UNCHANGED files — batch UPDATE last_seen_run
if unchanged_paths:
unch_ids = [db[p]["id"] for p in unchanged_paths]
batch_update_unchanged(cur, unch_ids, run_id)
# 5e) Log all events
if events:
batch_log_events(cur, events)
# ── 6. Backup ──
if files_to_backup and BACKUP_PATH:
print(f"[6/7] Backing up {len(files_to_backup)} files...")
backed = ensure_backed_up(files_to_backup, BACKUP_PATH)
print(f" {backed} new blobs written.")
else:
print("[6/7] Nothing to backup.")
# ── 7. Finalize ──
stats = {
"total": len(fs),
"new": len(new_paths),
"modified": len(modified_paths),
"deleted": len(deleted_paths),
"unchanged": len(unchanged_paths),
}
finalize_run(cur, run_id, stats)
conn.commit()
print(f"[7/7] Run #{run_id} COMPLETED.")
except Exception as e:
print(f"\nERROR: {e}")
try:
fail_run(cur, run_id)
conn.commit()
except Exception:
pass
conn.rollback()
raise
finally:
conn.close()
# ── Summary ──
print("\n" + "=" * 60)
print(f"Total : {stats['total']}")
print(f"New : {stats['new']}")
print(f"Modified : {stats['modified']}")
print(f"Deleted : {stats['deleted']}")
print(f"Unchanged: {stats['unchanged']}")
print("=" * 60)
# ── 8. Generate Excel report ──
try:
from report import generate_report
report_dir = r"u:\Dropbox\!!!Days\Downloads Z230"
timestamp = datetime.now().strftime("%Y-%m-%d %H_%M")
report_path = os.path.join(report_dir, f"{timestamp} DropboxBackupReport.xlsx")
print(f"\n[8] Generating report...")
generate_report(report_path)
except Exception as e:
print(f" WARN: Report generation failed: {e}")
if __name__ == "__main__":
main()