This commit is contained in:
2026-05-18 07:04:08 +02:00
parent 52f04c2839
commit 07e6a9c374
8 changed files with 254 additions and 99 deletions
+59 -69
View File
@@ -2,6 +2,7 @@ import os
from datetime import datetime
from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN, BACKUP_PATH
from indexer.logger import setup_logging
from indexer.scanner import scan_files
from indexer.hasher import blake3_file
from indexer.db import (
@@ -11,42 +12,43 @@ from indexer.db import (
)
from indexer.events import batch_log_events
from indexer.backup import ensure_backed_up
from indexer.hasher import is_cloud_placeholder
from indexer.hasher import is_cloud_placeholder, hydrate_file
def main():
print("=" * 60)
print("ORDINACE DROPBOX BACKUP INDEXER")
print(f"Root : {ROOT_PATH}")
print(f"Backup : {BACKUP_PATH}")
print(f"DRY RUN : {DRY_RUN}")
print("=" * 60)
log = setup_logging()
log.info("=" * 60)
log.info("ORDINACE DROPBOX BACKUP INDEXER")
log.info(f"Root : {ROOT_PATH}")
log.info(f"Backup : {BACKUP_PATH}")
log.info(f"DRY RUN : {DRY_RUN}")
log.info("=" * 60)
# ── 1. Scan filesystem (fast, no hashing) ──
print("\n[1/7] Scanning filesystem...")
log.info("[1/7] Scanning filesystem...")
fs = scan_files(ROOT_PATH)
print(f" Found {len(fs)} files on disk.")
log.info(f" Found {len(fs)} files on disk.")
if DRY_RUN:
# V DRY_RUN režimu jen ukážeme co by se stalo
print("\n[DRY RUN] No DB connection, showing scan results only.")
print(f" Files on disk: {len(fs)}")
log.info("[DRY RUN] No DB connection, showing scan results only.")
log.info(f" Files on disk: {len(fs)}")
return
# ── 2. Connect & create run ──
conn = get_connection()
cur = conn.cursor()
run_id = create_run(cur)
print(f"\n[2/7] Run #{run_id} created.")
log.info(f"[2/7] Run #{run_id} created.")
try:
# ── 3. Load DB state ──
print("[3/7] Loading DB state...")
log.info("[3/7] Loading DB state...")
db = load_all_files(cur)
print(f" {len(db)} files in DB (exists_now=1).")
log.info(f" {len(db)} files in DB (exists_now=1).")
# ── 4. Diff ──
print("[4/7] Diffing...")
log.info("[4/7] Diffing...")
fs_paths = set(fs.keys())
db_paths = set(db.keys())
@@ -64,11 +66,11 @@ def main():
else:
unchanged_paths.add(p)
print(f" NEW: {len(new_paths)} MOD: {len(modified_paths)} "
f"DEL: {len(deleted_paths)} SAME: {len(unchanged_paths)}")
log.info(f" NEW: {len(new_paths)} MOD: {len(modified_paths)} "
f"DEL: {len(deleted_paths)} SAME: {len(unchanged_paths)}")
# ── 5. Process changes ──
print("[5/7] Processing changes...")
log.info("[5/7] Processing changes...")
events = []
files_to_backup = []
@@ -76,19 +78,21 @@ def main():
skipped_files = []
new_files = []
if new_paths:
print(f" Hashing {len(new_paths)} new files...")
new_files = []
log.info(f" Hashing {len(new_paths)} new files...")
for p in new_paths:
f = fs[p]
if is_cloud_placeholder(f["full_path"]):
reason = "not synced (cloud placeholder)"
print(f" WARN: skip {p}: {reason}")
skipped_files.append((p, reason))
continue
log.warning(f" PLACEHOLDER {p} — čekám na stažení...")
if not hydrate_file(f["full_path"]):
reason = "not synced (cloud placeholder, hydration timeout)"
log.warning(f" SKIP {p}: {reason}")
skipped_files.append((p, reason))
continue
log.info(f" OK hydrated: {p}")
try:
content_hash = blake3_file(f["full_path"])
except (FileNotFoundError, PermissionError, OSError) as e:
print(f" WARN: skip {p}: {e}")
log.warning(f" SKIP {p}: {e}")
skipped_files.append((p, str(e)))
continue
new_files.append({
@@ -114,7 +118,7 @@ def main():
# 5b) MODIFIED files — compute BLAKE3, batch UPDATE
if modified_paths:
print(f" Hashing {len(modified_paths)} modified files...")
log.info(f" Hashing {len(modified_paths)} modified files...")
mod_files = []
for p in modified_paths:
f = fs[p]
@@ -122,7 +126,7 @@ def main():
try:
content_hash = blake3_file(f["full_path"])
except (FileNotFoundError, PermissionError, OSError) as e:
print(f" WARN: skip {p}: {e}")
log.warning(f" SKIP {p}: {e}")
continue
mod_files.append({
"id": db_file["id"],
@@ -168,16 +172,16 @@ def main():
# ── 6. Backup ──
if files_to_backup and BACKUP_PATH:
print(f"[6/7] Backing up {len(files_to_backup)} files...")
log.info(f"[6/7] Backing up {len(files_to_backup)} files...")
backed = ensure_backed_up(files_to_backup, BACKUP_PATH)
print(f" {backed} new blobs written.")
log.info(f" {backed} new blobs written.")
else:
print("[6/7] Nothing to backup.")
log.info("[6/7] Nothing to backup.")
# ── 7. Finalize ──
stats = {
"total": len(fs),
"new": len(new_files) if new_paths else 0,
"new": len(new_files),
"modified": len(modified_paths),
"deleted": len(deleted_paths),
"unchanged": len(unchanged_paths),
@@ -185,10 +189,10 @@ def main():
}
finalize_run(cur, run_id, stats)
conn.commit()
print(f"[7/7] Run #{run_id} COMPLETED.")
log.info(f"[7/7] Run #{run_id} COMPLETED.")
except Exception as e:
print(f"\nERROR: {e}")
log.exception(f"FATAL ERROR: {e}")
try:
fail_run(cur, run_id)
conn.commit()
@@ -200,19 +204,19 @@ def main():
conn.close()
# ── Summary ──
print("\n" + "=" * 60)
print(f"Total : {stats['total']}")
print(f"New : {stats['new']}")
print(f"Modified : {stats['modified']}")
print(f"Deleted : {stats['deleted']}")
print(f"Unchanged: {stats['unchanged']}")
log.info("=" * 60)
log.info(f"Total : {stats['total']}")
log.info(f"New : {stats['new']}")
log.info(f"Modified : {stats['modified']}")
log.info(f"Deleted : {stats['deleted']}")
log.info(f"Unchanged: {stats['unchanged']}")
if skipped_files:
print(f"Skipped : {len(skipped_files)} (hash failed)")
print("-" * 60)
log.warning(f"Skipped : {len(skipped_files)} (hash failed)")
log.info("-" * 60)
for path, reason in skipped_files:
print(f" SKIP: {path}")
print(f" {reason}")
print("=" * 60)
log.warning(f" SKIP: {path}")
log.warning(f" {reason}")
log.info("=" * 60)
# ── 8. Generate Excel report ──
report_path = None
@@ -225,10 +229,10 @@ def main():
os.remove(os.path.join(report_dir, f))
timestamp = datetime.now().strftime("%Y-%m-%d %H_%M")
report_path = os.path.join(report_dir, f"{timestamp} DropboxBackupReport.xlsx")
print(f"\n[8] Generating report...")
log.info("[8] Generating report...")
generate_report(report_path)
except Exception as e:
print(f" WARN: Report generation failed: {e}")
log.warning(f"Report generation failed: {e}")
# ── 9. Send email notification ──
try:
@@ -252,23 +256,6 @@ def main():
{rows}
</table>"""
def _file_section(title, color, paths):
if not paths:
return ""
rows = "".join(f"<tr><td style='padding:2px 8px;font-size:12px;'>{p}</td></tr>" for p in sorted(paths))
return f"""
<h3 style="color:{color};margin-top:18px;">{title} ({len(paths)})</h3>
<table border="0" cellpadding="2" cellspacing="0" style="border-collapse:collapse;width:100%;font-family:monospace;">
{rows}
</table>"""
new_paths_ok = [nf["relative_path"] for nf in new_files]
files_detail = (
_file_section("&#10003; Nove soubory", "#2a7a2a", new_paths_ok)
+ _file_section("&#9998; Zmenene soubory", "#a07000", list(modified_paths))
+ _file_section("&#10007; Smazane soubory", "#a00000", list(deleted_paths))
)
body = f"""
<html><body style="font-family:Segoe UI,Arial,sans-serif;font-size:14px;color:#222;">
<h2 style="color:#2e6da4;">&#10003; Dropbox Ordinace Backup &ndash; {ts}</h2>
@@ -283,16 +270,19 @@ def main():
{skipped_row}
{report_line}
</table>
{files_detail}
{skipped_detail}
<p style="color:#888;font-size:12px;margin-top:20px;">REPORTER &bull; {ts}</p>
</body></html>
"""
subject = f"Dropbox Backup #{run_id} \u2013 {ts} ({changes} zmen)"
send_mail("vladimir.buzalka@buzalka.cz", subject, body, html=True)
print(f"\n[9] Email odeslan na vladimir.buzalka@buzalka.cz")
subject = f"Dropbox Backup #{run_id} {ts} ({changes} zmen)"
from indexer.config import LOG_DIR
log_file = os.path.join(LOG_DIR, "backup.log")
attachments = [log_file] if os.path.exists(log_file) else []
send_mail("vladimir.buzalka@buzalka.cz", subject, body, html=True, attachments=attachments)
log.info("[9] Email odeslan na vladimir.buzalka@buzalka.cz")
except Exception as e:
print(f" WARN: Email failed: {e}")
log.warning(f"Email failed: {e}")
if __name__ == "__main__":