This commit is contained in:
2026-02-09 20:16:37 +01:00
parent e7dd89962e
commit 9838164b88
9 changed files with 444 additions and 150 deletions

221
main.py
View File

@@ -1,73 +1,200 @@
from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN
from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN, BACKUP_PATH
from indexer.scanner import scan_files
from indexer.hasher import blake3_file
from indexer.db import (
get_connection,
preload_mark_all_missing,
find_file_by_path,
insert_file,
update_file,
path_hash,
get_connection, create_run, finalize_run, fail_run,
load_all_files, batch_insert_files, batch_update_modified,
batch_mark_deleted, batch_update_unchanged,
)
from indexer.events import log_event
from indexer.events import batch_log_events
from indexer.backup import ensure_backed_up
def main():
print("=" * 60)
print("ORDINACE DROPBOX BACKUP INDEXER")
print(f"Root : {ROOT_PATH}")
print(f"Name : {ROOT_NAME}")
print(f"Backup : {BACKUP_PATH}")
print(f"DRY RUN : {DRY_RUN}")
print("=" * 60)
# ── 1. Scan filesystem (fast, no hashing) ──
print("\n[1/7] Scanning filesystem...")
fs = scan_files(ROOT_PATH)
print(f" Found {len(fs)} files on disk.")
if DRY_RUN:
# V DRY_RUN režimu jen ukážeme co by se stalo
print("\n[DRY RUN] No DB connection, showing scan results only.")
print(f" Files on disk: {len(fs)}")
return
# ── 2. Connect & create run ──
conn = get_connection()
cur = conn.cursor()
run_id = create_run(cur)
print(f"\n[2/7] Run #{run_id} created.")
if not DRY_RUN:
preload_mark_all_missing()
try:
# ── 3. Load DB state ──
print("[3/7] Loading DB state...")
db = load_all_files(cur)
print(f" {len(db)} files in DB (exists_now=1).")
created = modified = seen = 0
# ── 4. Diff ──
print("[4/7] Diffing...")
fs_paths = set(fs.keys())
db_paths = set(db.keys())
for file in scan_files(ROOT_PATH):
seen += 1
ph = path_hash(file["full_path"])
row = find_file_by_path(cur, ph)
new_paths = fs_paths - db_paths
deleted_paths = db_paths - fs_paths
existing_paths = fs_paths & db_paths
if row is None:
created += 1
if not DRY_RUN:
file_id = insert_file(cur, file)
log_event(cur, file_id, "CREATED", new=file)
else:
file_id, old_size, old_mtime, old_hash = row
if old_size != file["size"] or old_hash != file["content_hash"]:
modified += 1
if not DRY_RUN:
update_file(cur, file_id, file)
log_event(
cur,
file_id,
"MODIFIED",
old={"size": old_size, "content_hash": old_hash},
new=file,
)
modified_paths = set()
unchanged_paths = set()
for p in existing_paths:
fs_file = fs[p]
db_file = db[p]
if fs_file["size"] != db_file["size"] or fs_file["mtime"] != db_file["mtime"]:
modified_paths.add(p)
else:
if not DRY_RUN:
cur.execute(
"UPDATE files SET last_seen = NOW(), exists_now = 1 WHERE id = %s",
(file_id,)
)
unchanged_paths.add(p)
if seen % 500 == 0:
print(f"{seen} files scanned...")
print(f" NEW: {len(new_paths)} MOD: {len(modified_paths)} "
f"DEL: {len(deleted_paths)} SAME: {len(unchanged_paths)}")
if not DRY_RUN:
# ── 5. Process changes ──
print("[5/7] Processing changes...")
events = []
files_to_backup = []
# 5a) NEW files — compute BLAKE3, batch INSERT
if new_paths:
print(f" Hashing {len(new_paths)} new files...")
new_files = []
for p in new_paths:
f = fs[p]
try:
content_hash = blake3_file(f["full_path"])
except (FileNotFoundError, PermissionError, OSError) as e:
print(f" WARN: skip {p}: {e}")
continue
new_files.append({
"relative_path": p,
"file_name": f["file_name"],
"directory": f["directory"],
"size": f["size"],
"mtime": f["mtime"],
"content_hash": content_hash,
})
files_to_backup.append((f["full_path"], content_hash))
if new_files:
path_to_id = batch_insert_files(cur, new_files, run_id)
for nf in new_files:
events.append({
"run_id": run_id,
"file_id": path_to_id[nf["relative_path"]],
"event_type": "CREATED",
"new_size": nf["size"],
"new_hash": nf["content_hash"],
})
# 5b) MODIFIED files — compute BLAKE3, batch UPDATE
if modified_paths:
print(f" Hashing {len(modified_paths)} modified files...")
mod_files = []
for p in modified_paths:
f = fs[p]
db_file = db[p]
try:
content_hash = blake3_file(f["full_path"])
except (FileNotFoundError, PermissionError, OSError) as e:
print(f" WARN: skip {p}: {e}")
continue
mod_files.append({
"id": db_file["id"],
"size": f["size"],
"mtime": f["mtime"],
"content_hash": content_hash,
})
events.append({
"run_id": run_id,
"file_id": db_file["id"],
"event_type": "MODIFIED",
"old_size": db_file["size"],
"new_size": f["size"],
"old_hash": db_file["content_hash"],
"new_hash": content_hash,
})
files_to_backup.append((f["full_path"], content_hash))
if mod_files:
batch_update_modified(cur, mod_files, run_id)
# 5c) DELETED files — batch UPDATE exists_now=0
if deleted_paths:
del_ids = [db[p]["id"] for p in deleted_paths]
batch_mark_deleted(cur, del_ids, run_id)
for p in deleted_paths:
events.append({
"run_id": run_id,
"file_id": db[p]["id"],
"event_type": "DELETED",
"old_size": db[p]["size"],
"old_hash": db[p]["content_hash"],
})
# 5d) UNCHANGED files — batch UPDATE last_seen_run
if unchanged_paths:
unch_ids = [db[p]["id"] for p in unchanged_paths]
batch_update_unchanged(cur, unch_ids, run_id)
# 5e) Log all events
if events:
batch_log_events(cur, events)
# ── 6. Backup ──
if files_to_backup and BACKUP_PATH:
print(f"[6/7] Backing up {len(files_to_backup)} files...")
backed = ensure_backed_up(files_to_backup, BACKUP_PATH)
print(f" {backed} new blobs written.")
else:
print("[6/7] Nothing to backup.")
# ── 7. Finalize ──
stats = {
"total": len(fs),
"new": len(new_paths),
"modified": len(modified_paths),
"deleted": len(deleted_paths),
"unchanged": len(unchanged_paths),
}
finalize_run(cur, run_id, stats)
conn.commit()
print(f"[7/7] Run #{run_id} COMPLETED.")
print("================================")
print(f"Scanned : {seen}")
print(f"Created : {created}")
print(f"Modified : {modified}")
except Exception as e:
print(f"\nERROR: {e}")
try:
fail_run(cur, run_id)
conn.commit()
except Exception:
pass
conn.rollback()
raise
finally:
conn.close()
# ── Summary ──
print("\n" + "=" * 60)
print(f"Total : {stats['total']}")
print(f"New : {stats['new']}")
print(f"Modified : {stats['modified']}")
print(f"Deleted : {stats['deleted']}")
print(f"Unchanged: {stats['unchanged']}")
print("=" * 60)
conn.close()
if __name__ == "__main__":
main()