reporter
This commit is contained in:
@@ -32,3 +32,10 @@ BACKUP_PASSWORD = os.getenv("BACKUP_PASSWORD")
|
||||
|
||||
DRY_RUN = os.getenv("DRY_RUN", "true").lower() in ("1", "true", "yes")
|
||||
BATCH_SIZE = int(os.getenv("BATCH_SIZE", 1000))
|
||||
|
||||
# =========================
|
||||
# Logging
|
||||
# =========================
|
||||
|
||||
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
||||
LOG_DIR = os.getenv("LOG_DIR", r"C:\Reporting\DropboxBackup\logs")
|
||||
|
||||
+22
-27
@@ -1,8 +1,13 @@
|
||||
import unicodedata
|
||||
import pymysql
|
||||
from datetime import datetime
|
||||
from indexer.config import DB_CONFIG, BATCH_SIZE
|
||||
|
||||
|
||||
def _nfc(s: str) -> str:
|
||||
return unicodedata.normalize("NFC", s) if s else s
|
||||
|
||||
|
||||
def get_connection():
|
||||
return pymysql.connect(**DB_CONFIG)
|
||||
|
||||
@@ -50,7 +55,7 @@ def load_all_files(cur) -> dict:
|
||||
result = {}
|
||||
for row in cur.fetchall():
|
||||
file_id, rel_path, size, mtime, content_hash = row
|
||||
result[rel_path] = {
|
||||
result[_nfc(rel_path)] = {
|
||||
"id": file_id,
|
||||
"size": size,
|
||||
"mtime": mtime,
|
||||
@@ -70,34 +75,24 @@ def batch_insert_files(cur, files_list: list, run_id: int) -> dict:
|
||||
Returns: {relative_path: file_id}
|
||||
"""
|
||||
path_to_id = {}
|
||||
for i in range(0, len(files_list), BATCH_SIZE):
|
||||
chunk = files_list[i:i + BATCH_SIZE]
|
||||
for f in chunk:
|
||||
cur.execute(
|
||||
"""INSERT INTO files
|
||||
(relative_path, file_name, directory, file_size, mtime,
|
||||
content_hash, first_seen_run, last_seen_run, exists_now)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 1)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
file_name = VALUES(file_name),
|
||||
directory = VALUES(directory),
|
||||
file_size = VALUES(file_size),
|
||||
mtime = VALUES(mtime),
|
||||
content_hash = VALUES(content_hash),
|
||||
last_seen_run = VALUES(last_seen_run),
|
||||
exists_now = 1""",
|
||||
(f["relative_path"], f["file_name"], f["directory"],
|
||||
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
|
||||
)
|
||||
# Fetch real IDs
|
||||
paths = [f["relative_path"] for f in chunk]
|
||||
placeholders = ",".join(["%s"] * len(paths))
|
||||
for f in files_list:
|
||||
cur.execute(
|
||||
f"SELECT id, relative_path FROM files WHERE relative_path IN ({placeholders})",
|
||||
paths,
|
||||
"""INSERT INTO files
|
||||
(relative_path, file_name, directory, file_size, mtime,
|
||||
content_hash, first_seen_run, last_seen_run, exists_now)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 1)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
file_name = VALUES(file_name),
|
||||
directory = VALUES(directory),
|
||||
file_size = VALUES(file_size),
|
||||
mtime = VALUES(mtime),
|
||||
content_hash = VALUES(content_hash),
|
||||
last_seen_run = VALUES(last_seen_run),
|
||||
exists_now = 1""",
|
||||
(f["relative_path"], f["file_name"], f["directory"],
|
||||
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
|
||||
)
|
||||
for row in cur.fetchall():
|
||||
path_to_id[row[1]] = row[0]
|
||||
path_to_id[f["relative_path"]] = cur.lastrowid
|
||||
return path_to_id
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import ctypes
|
||||
import time
|
||||
|
||||
from blake3 import blake3
|
||||
|
||||
@@ -17,6 +18,25 @@ def is_cloud_placeholder(path: str) -> bool:
|
||||
return bool(attrs & _CLOUD_MASK)
|
||||
|
||||
|
||||
def hydrate_file(path: str, timeout: int = 120, poll: int = 3) -> bool:
|
||||
"""
|
||||
Přinutí Dropbox stáhnout cloud placeholder otevřením souboru.
|
||||
Čeká max timeout sekund. Vrátí True pokud se soubor stáhl.
|
||||
"""
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
f.read(1)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
deadline = time.time() + timeout
|
||||
while time.time() < deadline:
|
||||
if not is_cloud_placeholder(path):
|
||||
return True
|
||||
time.sleep(poll)
|
||||
return False
|
||||
|
||||
|
||||
def blake3_file(path, chunk_size=1024 * 1024):
|
||||
"""Spočítá BLAKE3 hash souboru po blocích (bez načtení do paměti)."""
|
||||
h = blake3()
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from logging.handlers import TimedRotatingFileHandler
|
||||
|
||||
from indexer.config import LOG_LEVEL, LOG_DIR
|
||||
|
||||
|
||||
def setup_logging() -> logging.Logger:
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
|
||||
level = getattr(logging, LOG_LEVEL.upper(), logging.INFO)
|
||||
fmt = logging.Formatter(
|
||||
"%(asctime)s [%(levelname)-8s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
|
||||
file_handler = TimedRotatingFileHandler(
|
||||
os.path.join(LOG_DIR, "backup.log"),
|
||||
when="midnight",
|
||||
backupCount=90,
|
||||
encoding="utf-8",
|
||||
)
|
||||
file_handler.setFormatter(fmt)
|
||||
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setFormatter(fmt)
|
||||
|
||||
logging.root.setLevel(level)
|
||||
logging.root.handlers.clear()
|
||||
logging.root.addHandler(file_handler)
|
||||
logging.root.addHandler(console_handler)
|
||||
|
||||
return logging.getLogger("backup")
|
||||
+3
-2
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import unicodedata
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@@ -18,8 +19,8 @@ def scan_files(root_path: str) -> dict:
|
||||
stat = os.stat(full_path)
|
||||
except (FileNotFoundError, PermissionError):
|
||||
continue
|
||||
rel_path = os.path.relpath(full_path, root_path).replace("\\", "/")
|
||||
rel_dir = os.path.relpath(root, root_path).replace("\\", "/")
|
||||
rel_path = unicodedata.normalize("NFC", os.path.relpath(full_path, root_path).replace("\\", "/"))
|
||||
rel_dir = unicodedata.normalize("NFC", os.path.relpath(root, root_path).replace("\\", "/"))
|
||||
# Truncate microseconds — MySQL DATETIME rounds to whole seconds,
|
||||
# which causes false "modified" detections on every run.
|
||||
mtime = datetime.fromtimestamp(stat.st_mtime).replace(microsecond=0)
|
||||
|
||||
Reference in New Issue
Block a user