Files
drobboxordinacebackup/main.py
T
2026-04-28 06:11:30 +02:00

300 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
from datetime import datetime
from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN, BACKUP_PATH
from indexer.scanner import scan_files
from indexer.hasher import blake3_file
from indexer.db import (
get_connection, create_run, finalize_run, fail_run,
load_all_files, batch_insert_files, batch_update_modified,
batch_mark_deleted, batch_update_unchanged,
)
from indexer.events import batch_log_events
from indexer.backup import ensure_backed_up
from indexer.hasher import is_cloud_placeholder
def main():
print("=" * 60)
print("ORDINACE DROPBOX BACKUP INDEXER")
print(f"Root : {ROOT_PATH}")
print(f"Backup : {BACKUP_PATH}")
print(f"DRY RUN : {DRY_RUN}")
print("=" * 60)
# ── 1. Scan filesystem (fast, no hashing) ──
print("\n[1/7] Scanning filesystem...")
fs = scan_files(ROOT_PATH)
print(f" Found {len(fs)} files on disk.")
if DRY_RUN:
# V DRY_RUN režimu jen ukážeme co by se stalo
print("\n[DRY RUN] No DB connection, showing scan results only.")
print(f" Files on disk: {len(fs)}")
return
# ── 2. Connect & create run ──
conn = get_connection()
cur = conn.cursor()
run_id = create_run(cur)
print(f"\n[2/7] Run #{run_id} created.")
try:
# ── 3. Load DB state ──
print("[3/7] Loading DB state...")
db = load_all_files(cur)
print(f" {len(db)} files in DB (exists_now=1).")
# ── 4. Diff ──
print("[4/7] Diffing...")
fs_paths = set(fs.keys())
db_paths = set(db.keys())
new_paths = fs_paths - db_paths
deleted_paths = db_paths - fs_paths
existing_paths = fs_paths & db_paths
modified_paths = set()
unchanged_paths = set()
for p in existing_paths:
fs_file = fs[p]
db_file = db[p]
if fs_file["size"] != db_file["size"] or fs_file["mtime"] != db_file["mtime"]:
modified_paths.add(p)
else:
unchanged_paths.add(p)
print(f" NEW: {len(new_paths)} MOD: {len(modified_paths)} "
f"DEL: {len(deleted_paths)} SAME: {len(unchanged_paths)}")
# ── 5. Process changes ──
print("[5/7] Processing changes...")
events = []
files_to_backup = []
# 5a) NEW files — compute BLAKE3, batch INSERT
skipped_files = []
new_files = []
if new_paths:
print(f" Hashing {len(new_paths)} new files...")
new_files = []
for p in new_paths:
f = fs[p]
if is_cloud_placeholder(f["full_path"]):
reason = "not synced (cloud placeholder)"
print(f" WARN: skip {p}: {reason}")
skipped_files.append((p, reason))
continue
try:
content_hash = blake3_file(f["full_path"])
except (FileNotFoundError, PermissionError, OSError) as e:
print(f" WARN: skip {p}: {e}")
skipped_files.append((p, str(e)))
continue
new_files.append({
"relative_path": p,
"file_name": f["file_name"],
"directory": f["directory"],
"size": f["size"],
"mtime": f["mtime"],
"content_hash": content_hash,
})
files_to_backup.append((f["full_path"], content_hash))
if new_files:
path_to_id = batch_insert_files(cur, new_files, run_id)
for nf in new_files:
events.append({
"run_id": run_id,
"file_id": path_to_id[nf["relative_path"]],
"event_type": "CREATED",
"new_size": nf["size"],
"new_hash": nf["content_hash"],
})
# 5b) MODIFIED files — compute BLAKE3, batch UPDATE
if modified_paths:
print(f" Hashing {len(modified_paths)} modified files...")
mod_files = []
for p in modified_paths:
f = fs[p]
db_file = db[p]
try:
content_hash = blake3_file(f["full_path"])
except (FileNotFoundError, PermissionError, OSError) as e:
print(f" WARN: skip {p}: {e}")
continue
mod_files.append({
"id": db_file["id"],
"size": f["size"],
"mtime": f["mtime"],
"content_hash": content_hash,
})
events.append({
"run_id": run_id,
"file_id": db_file["id"],
"event_type": "MODIFIED",
"old_size": db_file["size"],
"new_size": f["size"],
"old_hash": db_file["content_hash"],
"new_hash": content_hash,
})
files_to_backup.append((f["full_path"], content_hash))
if mod_files:
batch_update_modified(cur, mod_files, run_id)
# 5c) DELETED files — batch UPDATE exists_now=0
if deleted_paths:
del_ids = [db[p]["id"] for p in deleted_paths]
batch_mark_deleted(cur, del_ids, run_id)
for p in deleted_paths:
events.append({
"run_id": run_id,
"file_id": db[p]["id"],
"event_type": "DELETED",
"old_size": db[p]["size"],
"old_hash": db[p]["content_hash"],
})
# 5d) UNCHANGED files — batch UPDATE last_seen_run
if unchanged_paths:
unch_ids = [db[p]["id"] for p in unchanged_paths]
batch_update_unchanged(cur, unch_ids, run_id)
# 5e) Log all events
if events:
batch_log_events(cur, events)
# ── 6. Backup ──
if files_to_backup and BACKUP_PATH:
print(f"[6/7] Backing up {len(files_to_backup)} files...")
backed = ensure_backed_up(files_to_backup, BACKUP_PATH)
print(f" {backed} new blobs written.")
else:
print("[6/7] Nothing to backup.")
# ── 7. Finalize ──
stats = {
"total": len(fs),
"new": len(new_files) if new_paths else 0,
"modified": len(modified_paths),
"deleted": len(deleted_paths),
"unchanged": len(unchanged_paths),
"skipped": len(skipped_files),
}
finalize_run(cur, run_id, stats)
conn.commit()
print(f"[7/7] Run #{run_id} COMPLETED.")
except Exception as e:
print(f"\nERROR: {e}")
try:
fail_run(cur, run_id)
conn.commit()
except Exception:
pass
conn.rollback()
raise
finally:
conn.close()
# ── Summary ──
print("\n" + "=" * 60)
print(f"Total : {stats['total']}")
print(f"New : {stats['new']}")
print(f"Modified : {stats['modified']}")
print(f"Deleted : {stats['deleted']}")
print(f"Unchanged: {stats['unchanged']}")
if skipped_files:
print(f"Skipped : {len(skipped_files)} (hash failed)")
print("-" * 60)
for path, reason in skipped_files:
print(f" SKIP: {path}")
print(f" {reason}")
print("=" * 60)
# ── 8. Generate Excel report ──
report_path = None
try:
from report import generate_report
report_dir = r"z:\Dropbox\!!!Days\Downloads Z230"
for f in os.listdir(report_dir):
if f.endswith("DropboxBackupReport.xlsx"):
os.remove(os.path.join(report_dir, f))
timestamp = datetime.now().strftime("%Y-%m-%d %H_%M")
report_path = os.path.join(report_dir, f"{timestamp} DropboxBackupReport.xlsx")
print(f"\n[8] Generating report...")
generate_report(report_path)
except Exception as e:
print(f" WARN: Report generation failed: {e}")
# ── 9. Send email notification ──
try:
import sys
sys.path.insert(0, r"C:\Reporting\knihovny")
from EmailMessagingGraph import send_mail
ts = datetime.now().strftime("%d.%m.%Y %H:%M")
changes = stats['new'] + stats['modified'] + stats['deleted']
report_line = f"<tr><td>Report</td><td>{report_path}</td></tr>" if report_path else ""
skipped_row = ""
skipped_detail = ""
if skipped_files:
skipped_row = f"<tr style='background:#fff3cd;color:#856404;'><td><b>Preskocene</b></td><td>{len(skipped_files):,}</td></tr>"
rows = "".join(f"<tr><td>{p}</td><td>{r}</td></tr>" for p, r in skipped_files)
skipped_detail = f"""
<h3 style="color:#856404;">&#9888; Preskocene soubory ({len(skipped_files)})</h3>
<table border="0" cellpadding="4" cellspacing="0" style="border-collapse:collapse;font-size:12px;">
<tr style="background:#f0f4fa;"><td><b>Soubor</b></td><td><b>Duvod</b></td></tr>
{rows}
</table>"""
def _file_section(title, color, paths):
if not paths:
return ""
rows = "".join(f"<tr><td style='padding:2px 8px;font-size:12px;'>{p}</td></tr>" for p in sorted(paths))
return f"""
<h3 style="color:{color};margin-top:18px;">{title} ({len(paths)})</h3>
<table border="0" cellpadding="2" cellspacing="0" style="border-collapse:collapse;width:100%;font-family:monospace;">
{rows}
</table>"""
new_paths_ok = [nf["relative_path"] for nf in new_files]
files_detail = (
_file_section("&#10003; Nove soubory", "#2a7a2a", new_paths_ok)
+ _file_section("&#9998; Zmenene soubory", "#a07000", list(modified_paths))
+ _file_section("&#10007; Smazane soubory", "#a00000", list(deleted_paths))
)
body = f"""
<html><body style="font-family:Segoe UI,Arial,sans-serif;font-size:14px;color:#222;">
<h2 style="color:#2e6da4;">&#10003; Dropbox Ordinace Backup &ndash; {ts}</h2>
<table border="0" cellpadding="6" cellspacing="0" style="border-collapse:collapse;min-width:350px;">
<tr style="background:#f0f4fa;"><td><b>Run #</b></td><td>{run_id}</td></tr>
<tr><td><b>Celkem souboru</b></td><td>{stats['total']:,}</td></tr>
<tr style="background:#f0f4fa;color:#2a7a2a;"><td><b>Nove</b></td><td>{stats['new']:,}</td></tr>
<tr style="color:#a07000;"><td><b>Zmenene</b></td><td>{stats['modified']:,}</td></tr>
<tr style="background:#f0f4fa;color:#a00000;"><td><b>Smazane</b></td><td>{stats['deleted']:,}</td></tr>
<tr><td><b>Nezmenene</b></td><td>{stats['unchanged']:,}</td></tr>
<tr style="background:#f0f4fa;"><td><b>Zmen celkem</b></td><td>{changes:,}</td></tr>
{skipped_row}
{report_line}
</table>
{files_detail}
{skipped_detail}
<p style="color:#888;font-size:12px;margin-top:20px;">REPORTER &bull; {ts}</p>
</body></html>
"""
subject = f"Dropbox Backup #{run_id} \u2013 {ts} ({changes} zmen)"
send_mail("vladimir.buzalka@buzalka.cz", subject, body, html=True)
print(f"\n[9] Email odeslan na vladimir.buzalka@buzalka.cz")
except Exception as e:
print(f" WARN: Email failed: {e}")
if __name__ == "__main__":
main()