This commit is contained in:
2026-05-18 07:04:08 +02:00
parent 52f04c2839
commit 07e6a9c374
8 changed files with 254 additions and 99 deletions
+7
View File
@@ -32,3 +32,10 @@ BACKUP_PASSWORD = os.getenv("BACKUP_PASSWORD")
DRY_RUN = os.getenv("DRY_RUN", "true").lower() in ("1", "true", "yes")
BATCH_SIZE = int(os.getenv("BATCH_SIZE", 1000))
# =========================
# Logging
# =========================
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
LOG_DIR = os.getenv("LOG_DIR", r"C:\Reporting\DropboxBackup\logs")
+22 -27
View File
@@ -1,8 +1,13 @@
import unicodedata
import pymysql
from datetime import datetime
from indexer.config import DB_CONFIG, BATCH_SIZE
def _nfc(s: str) -> str:
return unicodedata.normalize("NFC", s) if s else s
def get_connection():
return pymysql.connect(**DB_CONFIG)
@@ -50,7 +55,7 @@ def load_all_files(cur) -> dict:
result = {}
for row in cur.fetchall():
file_id, rel_path, size, mtime, content_hash = row
result[rel_path] = {
result[_nfc(rel_path)] = {
"id": file_id,
"size": size,
"mtime": mtime,
@@ -70,34 +75,24 @@ def batch_insert_files(cur, files_list: list, run_id: int) -> dict:
Returns: {relative_path: file_id}
"""
path_to_id = {}
for i in range(0, len(files_list), BATCH_SIZE):
chunk = files_list[i:i + BATCH_SIZE]
for f in chunk:
cur.execute(
"""INSERT INTO files
(relative_path, file_name, directory, file_size, mtime,
content_hash, first_seen_run, last_seen_run, exists_now)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 1)
ON DUPLICATE KEY UPDATE
file_name = VALUES(file_name),
directory = VALUES(directory),
file_size = VALUES(file_size),
mtime = VALUES(mtime),
content_hash = VALUES(content_hash),
last_seen_run = VALUES(last_seen_run),
exists_now = 1""",
(f["relative_path"], f["file_name"], f["directory"],
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
)
# Fetch real IDs
paths = [f["relative_path"] for f in chunk]
placeholders = ",".join(["%s"] * len(paths))
for f in files_list:
cur.execute(
f"SELECT id, relative_path FROM files WHERE relative_path IN ({placeholders})",
paths,
"""INSERT INTO files
(relative_path, file_name, directory, file_size, mtime,
content_hash, first_seen_run, last_seen_run, exists_now)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 1)
ON DUPLICATE KEY UPDATE
file_name = VALUES(file_name),
directory = VALUES(directory),
file_size = VALUES(file_size),
mtime = VALUES(mtime),
content_hash = VALUES(content_hash),
last_seen_run = VALUES(last_seen_run),
exists_now = 1""",
(f["relative_path"], f["file_name"], f["directory"],
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
)
for row in cur.fetchall():
path_to_id[row[1]] = row[0]
path_to_id[f["relative_path"]] = cur.lastrowid
return path_to_id
+20
View File
@@ -1,4 +1,5 @@
import ctypes
import time
from blake3 import blake3
@@ -17,6 +18,25 @@ def is_cloud_placeholder(path: str) -> bool:
return bool(attrs & _CLOUD_MASK)
def hydrate_file(path: str, timeout: int = 120, poll: int = 3) -> bool:
"""
Přinutí Dropbox stáhnout cloud placeholder otevřením souboru.
Čeká max timeout sekund. Vrátí True pokud se soubor stáhl.
"""
try:
with open(path, "rb") as f:
f.read(1)
except OSError:
pass
deadline = time.time() + timeout
while time.time() < deadline:
if not is_cloud_placeholder(path):
return True
time.sleep(poll)
return False
def blake3_file(path, chunk_size=1024 * 1024):
"""Spočítá BLAKE3 hash souboru po blocích (bez načtení do paměti)."""
h = blake3()
+34
View File
@@ -0,0 +1,34 @@
import logging
import os
import sys
from logging.handlers import TimedRotatingFileHandler
from indexer.config import LOG_LEVEL, LOG_DIR
def setup_logging() -> logging.Logger:
os.makedirs(LOG_DIR, exist_ok=True)
level = getattr(logging, LOG_LEVEL.upper(), logging.INFO)
fmt = logging.Formatter(
"%(asctime)s [%(levelname)-8s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
file_handler = TimedRotatingFileHandler(
os.path.join(LOG_DIR, "backup.log"),
when="midnight",
backupCount=90,
encoding="utf-8",
)
file_handler.setFormatter(fmt)
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(fmt)
logging.root.setLevel(level)
logging.root.handlers.clear()
logging.root.addHandler(file_handler)
logging.root.addHandler(console_handler)
return logging.getLogger("backup")
+3 -2
View File
@@ -1,4 +1,5 @@
import os
import unicodedata
from datetime import datetime
@@ -18,8 +19,8 @@ def scan_files(root_path: str) -> dict:
stat = os.stat(full_path)
except (FileNotFoundError, PermissionError):
continue
rel_path = os.path.relpath(full_path, root_path).replace("\\", "/")
rel_dir = os.path.relpath(root, root_path).replace("\\", "/")
rel_path = unicodedata.normalize("NFC", os.path.relpath(full_path, root_path).replace("\\", "/"))
rel_dir = unicodedata.normalize("NFC", os.path.relpath(root, root_path).replace("\\", "/"))
# Truncate microseconds — MySQL DATETIME rounds to whole seconds,
# which causes false "modified" detections on every run.
mtime = datetime.fromtimestamp(stat.st_mtime).replace(microsecond=0)