z230
This commit is contained in:
221
main.py
221
main.py
@@ -1,73 +1,200 @@
|
||||
from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN
|
||||
from indexer.config import ROOT_PATH, ROOT_NAME, DRY_RUN, BACKUP_PATH
|
||||
from indexer.scanner import scan_files
|
||||
from indexer.hasher import blake3_file
|
||||
from indexer.db import (
|
||||
get_connection,
|
||||
preload_mark_all_missing,
|
||||
find_file_by_path,
|
||||
insert_file,
|
||||
update_file,
|
||||
path_hash,
|
||||
get_connection, create_run, finalize_run, fail_run,
|
||||
load_all_files, batch_insert_files, batch_update_modified,
|
||||
batch_mark_deleted, batch_update_unchanged,
|
||||
)
|
||||
from indexer.events import log_event
|
||||
from indexer.events import batch_log_events
|
||||
from indexer.backup import ensure_backed_up
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("ORDINACE DROPBOX BACKUP – INDEXER")
|
||||
print(f"Root : {ROOT_PATH}")
|
||||
print(f"Name : {ROOT_NAME}")
|
||||
print(f"Backup : {BACKUP_PATH}")
|
||||
print(f"DRY RUN : {DRY_RUN}")
|
||||
print("=" * 60)
|
||||
|
||||
# ── 1. Scan filesystem (fast, no hashing) ──
|
||||
print("\n[1/7] Scanning filesystem...")
|
||||
fs = scan_files(ROOT_PATH)
|
||||
print(f" Found {len(fs)} files on disk.")
|
||||
|
||||
if DRY_RUN:
|
||||
# V DRY_RUN režimu jen ukážeme co by se stalo
|
||||
print("\n[DRY RUN] No DB connection, showing scan results only.")
|
||||
print(f" Files on disk: {len(fs)}")
|
||||
return
|
||||
|
||||
# ── 2. Connect & create run ──
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
run_id = create_run(cur)
|
||||
print(f"\n[2/7] Run #{run_id} created.")
|
||||
|
||||
if not DRY_RUN:
|
||||
preload_mark_all_missing()
|
||||
try:
|
||||
# ── 3. Load DB state ──
|
||||
print("[3/7] Loading DB state...")
|
||||
db = load_all_files(cur)
|
||||
print(f" {len(db)} files in DB (exists_now=1).")
|
||||
|
||||
created = modified = seen = 0
|
||||
# ── 4. Diff ──
|
||||
print("[4/7] Diffing...")
|
||||
fs_paths = set(fs.keys())
|
||||
db_paths = set(db.keys())
|
||||
|
||||
for file in scan_files(ROOT_PATH):
|
||||
seen += 1
|
||||
ph = path_hash(file["full_path"])
|
||||
row = find_file_by_path(cur, ph)
|
||||
new_paths = fs_paths - db_paths
|
||||
deleted_paths = db_paths - fs_paths
|
||||
existing_paths = fs_paths & db_paths
|
||||
|
||||
if row is None:
|
||||
created += 1
|
||||
if not DRY_RUN:
|
||||
file_id = insert_file(cur, file)
|
||||
log_event(cur, file_id, "CREATED", new=file)
|
||||
else:
|
||||
file_id, old_size, old_mtime, old_hash = row
|
||||
if old_size != file["size"] or old_hash != file["content_hash"]:
|
||||
modified += 1
|
||||
if not DRY_RUN:
|
||||
update_file(cur, file_id, file)
|
||||
log_event(
|
||||
cur,
|
||||
file_id,
|
||||
"MODIFIED",
|
||||
old={"size": old_size, "content_hash": old_hash},
|
||||
new=file,
|
||||
)
|
||||
modified_paths = set()
|
||||
unchanged_paths = set()
|
||||
for p in existing_paths:
|
||||
fs_file = fs[p]
|
||||
db_file = db[p]
|
||||
if fs_file["size"] != db_file["size"] or fs_file["mtime"] != db_file["mtime"]:
|
||||
modified_paths.add(p)
|
||||
else:
|
||||
if not DRY_RUN:
|
||||
cur.execute(
|
||||
"UPDATE files SET last_seen = NOW(), exists_now = 1 WHERE id = %s",
|
||||
(file_id,)
|
||||
)
|
||||
unchanged_paths.add(p)
|
||||
|
||||
if seen % 500 == 0:
|
||||
print(f"{seen} files scanned...")
|
||||
print(f" NEW: {len(new_paths)} MOD: {len(modified_paths)} "
|
||||
f"DEL: {len(deleted_paths)} SAME: {len(unchanged_paths)}")
|
||||
|
||||
if not DRY_RUN:
|
||||
# ── 5. Process changes ──
|
||||
print("[5/7] Processing changes...")
|
||||
events = []
|
||||
files_to_backup = []
|
||||
|
||||
# 5a) NEW files — compute BLAKE3, batch INSERT
|
||||
if new_paths:
|
||||
print(f" Hashing {len(new_paths)} new files...")
|
||||
new_files = []
|
||||
for p in new_paths:
|
||||
f = fs[p]
|
||||
try:
|
||||
content_hash = blake3_file(f["full_path"])
|
||||
except (FileNotFoundError, PermissionError, OSError) as e:
|
||||
print(f" WARN: skip {p}: {e}")
|
||||
continue
|
||||
new_files.append({
|
||||
"relative_path": p,
|
||||
"file_name": f["file_name"],
|
||||
"directory": f["directory"],
|
||||
"size": f["size"],
|
||||
"mtime": f["mtime"],
|
||||
"content_hash": content_hash,
|
||||
})
|
||||
files_to_backup.append((f["full_path"], content_hash))
|
||||
|
||||
if new_files:
|
||||
path_to_id = batch_insert_files(cur, new_files, run_id)
|
||||
for nf in new_files:
|
||||
events.append({
|
||||
"run_id": run_id,
|
||||
"file_id": path_to_id[nf["relative_path"]],
|
||||
"event_type": "CREATED",
|
||||
"new_size": nf["size"],
|
||||
"new_hash": nf["content_hash"],
|
||||
})
|
||||
|
||||
# 5b) MODIFIED files — compute BLAKE3, batch UPDATE
|
||||
if modified_paths:
|
||||
print(f" Hashing {len(modified_paths)} modified files...")
|
||||
mod_files = []
|
||||
for p in modified_paths:
|
||||
f = fs[p]
|
||||
db_file = db[p]
|
||||
try:
|
||||
content_hash = blake3_file(f["full_path"])
|
||||
except (FileNotFoundError, PermissionError, OSError) as e:
|
||||
print(f" WARN: skip {p}: {e}")
|
||||
continue
|
||||
mod_files.append({
|
||||
"id": db_file["id"],
|
||||
"size": f["size"],
|
||||
"mtime": f["mtime"],
|
||||
"content_hash": content_hash,
|
||||
})
|
||||
events.append({
|
||||
"run_id": run_id,
|
||||
"file_id": db_file["id"],
|
||||
"event_type": "MODIFIED",
|
||||
"old_size": db_file["size"],
|
||||
"new_size": f["size"],
|
||||
"old_hash": db_file["content_hash"],
|
||||
"new_hash": content_hash,
|
||||
})
|
||||
files_to_backup.append((f["full_path"], content_hash))
|
||||
|
||||
if mod_files:
|
||||
batch_update_modified(cur, mod_files, run_id)
|
||||
|
||||
# 5c) DELETED files — batch UPDATE exists_now=0
|
||||
if deleted_paths:
|
||||
del_ids = [db[p]["id"] for p in deleted_paths]
|
||||
batch_mark_deleted(cur, del_ids, run_id)
|
||||
for p in deleted_paths:
|
||||
events.append({
|
||||
"run_id": run_id,
|
||||
"file_id": db[p]["id"],
|
||||
"event_type": "DELETED",
|
||||
"old_size": db[p]["size"],
|
||||
"old_hash": db[p]["content_hash"],
|
||||
})
|
||||
|
||||
# 5d) UNCHANGED files — batch UPDATE last_seen_run
|
||||
if unchanged_paths:
|
||||
unch_ids = [db[p]["id"] for p in unchanged_paths]
|
||||
batch_update_unchanged(cur, unch_ids, run_id)
|
||||
|
||||
# 5e) Log all events
|
||||
if events:
|
||||
batch_log_events(cur, events)
|
||||
|
||||
# ── 6. Backup ──
|
||||
if files_to_backup and BACKUP_PATH:
|
||||
print(f"[6/7] Backing up {len(files_to_backup)} files...")
|
||||
backed = ensure_backed_up(files_to_backup, BACKUP_PATH)
|
||||
print(f" {backed} new blobs written.")
|
||||
else:
|
||||
print("[6/7] Nothing to backup.")
|
||||
|
||||
# ── 7. Finalize ──
|
||||
stats = {
|
||||
"total": len(fs),
|
||||
"new": len(new_paths),
|
||||
"modified": len(modified_paths),
|
||||
"deleted": len(deleted_paths),
|
||||
"unchanged": len(unchanged_paths),
|
||||
}
|
||||
finalize_run(cur, run_id, stats)
|
||||
conn.commit()
|
||||
print(f"[7/7] Run #{run_id} COMPLETED.")
|
||||
|
||||
print("================================")
|
||||
print(f"Scanned : {seen}")
|
||||
print(f"Created : {created}")
|
||||
print(f"Modified : {modified}")
|
||||
except Exception as e:
|
||||
print(f"\nERROR: {e}")
|
||||
try:
|
||||
fail_run(cur, run_id)
|
||||
conn.commit()
|
||||
except Exception:
|
||||
pass
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ── Summary ──
|
||||
print("\n" + "=" * 60)
|
||||
print(f"Total : {stats['total']}")
|
||||
print(f"New : {stats['new']}")
|
||||
print(f"Modified : {stats['modified']}")
|
||||
print(f"Deleted : {stats['deleted']}")
|
||||
print(f"Unchanged: {stats['unchanged']}")
|
||||
print("=" * 60)
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user