diff --git a/indexer/hasher.py b/indexer/hasher.py index 29235d2..3770397 100644 --- a/indexer/hasher.py +++ b/indexer/hasher.py @@ -1,10 +1,24 @@ +import ctypes + from blake3 import blake3 +# Windows atributy pro cloud/placeholder soubory +_FILE_ATTRIBUTE_OFFLINE = 0x00001000 +_FILE_ATTRIBUTE_RECALL_ON_OPEN = 0x00040000 +_FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS = 0x00400000 +_CLOUD_MASK = _FILE_ATTRIBUTE_OFFLINE | _FILE_ATTRIBUTE_RECALL_ON_OPEN | _FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS + + +def is_cloud_placeholder(path: str) -> bool: + """Vrátí True pokud soubor není lokálně stažený (Dropbox/OneDrive placeholder).""" + attrs = ctypes.windll.kernel32.GetFileAttributesW(path) + if attrs == 0xFFFFFFFF: # INVALID_FILE_ATTRIBUTES + return False + return bool(attrs & _CLOUD_MASK) + def blake3_file(path, chunk_size=1024 * 1024): - """ - Spočítá BLAKE3 hash souboru po blocích (bez načtení do paměti) - """ + """Spočítá BLAKE3 hash souboru po blocích (bez načtení do paměti).""" h = blake3() with open(path, "rb") as f: for chunk in iter(lambda: f.read(chunk_size), b""): diff --git a/main.py b/main.py index 5c974bb..06fa706 100644 --- a/main.py +++ b/main.py @@ -11,6 +11,7 @@ from indexer.db import ( ) from indexer.events import batch_log_events from indexer.backup import ensure_backed_up +from indexer.hasher import is_cloud_placeholder def main(): @@ -72,15 +73,22 @@ def main(): files_to_backup = [] # 5a) NEW files — compute BLAKE3, batch INSERT + skipped_files = [] if new_paths: print(f" Hashing {len(new_paths)} new files...") new_files = [] for p in new_paths: f = fs[p] + if is_cloud_placeholder(f["full_path"]): + reason = "not synced (cloud placeholder)" + print(f" WARN: skip {p}: {reason}") + skipped_files.append((p, reason)) + continue try: content_hash = blake3_file(f["full_path"]) except (FileNotFoundError, PermissionError, OSError) as e: print(f" WARN: skip {p}: {e}") + skipped_files.append((p, str(e))) continue new_files.append({ "relative_path": p, @@ -168,7 +176,7 @@ def main(): # ── 7. Finalize ── stats = { "total": len(fs), - "new": len(new_paths), + "new": len(new_files) if new_paths else 0, "modified": len(modified_paths), "deleted": len(deleted_paths), "unchanged": len(unchanged_paths), @@ -196,6 +204,12 @@ def main(): print(f"Modified : {stats['modified']}") print(f"Deleted : {stats['deleted']}") print(f"Unchanged: {stats['unchanged']}") + if skipped_files: + print(f"Skipped : {len(skipped_files)} (hash failed)") + print("-" * 60) + for path, reason in skipped_files: + print(f" SKIP: {path}") + print(f" {reason}") print("=" * 60) # ── 8. Generate Excel report ──