Files
drobboxordinacebackup/indexer/db.py
2026-02-12 07:54:22 +01:00

139 lines
4.9 KiB
Python

import pymysql
from datetime import datetime
from indexer.config import DB_CONFIG, BATCH_SIZE
def get_connection():
return pymysql.connect(**DB_CONFIG)
# ── Run management ──────────────────────────────────────────
def create_run(cur) -> int:
cur.execute(
"INSERT INTO runs (started_at, status) VALUES (%s, 'RUNNING')",
(datetime.now(),)
)
return cur.lastrowid
def finalize_run(cur, run_id: int, stats: dict):
cur.execute(
"""UPDATE runs
SET finished_at = %s, status = 'COMPLETED',
files_total = %s, files_new = %s, files_modified = %s,
files_deleted = %s, files_unchanged = %s
WHERE id = %s""",
(datetime.now(), stats["total"], stats["new"], stats["modified"],
stats["deleted"], stats["unchanged"], run_id)
)
def fail_run(cur, run_id: int):
cur.execute(
"UPDATE runs SET finished_at = %s, status = 'FAILED' WHERE id = %s",
(datetime.now(), run_id)
)
# ── Load DB state ──────────────────────────────────────────
def load_all_files(cur) -> dict:
"""
Načte všechny existující soubory z DB do RAM.
Returns: {relative_path: {id, size, mtime, content_hash}}
"""
cur.execute(
"""SELECT id, relative_path, file_size, mtime, content_hash
FROM files WHERE exists_now = 1"""
)
result = {}
for row in cur.fetchall():
file_id, rel_path, size, mtime, content_hash = row
result[rel_path] = {
"id": file_id,
"size": size,
"mtime": mtime,
"content_hash": content_hash,
}
return result
# ── Batch operations ────────────────────────────────────────
def batch_insert_files(cur, files_list: list, run_id: int) -> dict:
"""
Batch INSERT (or re-activate) souborů.
Handles re-appearing files that were previously deleted (exists_now=0)
via ON DUPLICATE KEY UPDATE.
files_list: [{relative_path, file_name, directory, size, mtime, content_hash}]
Returns: {relative_path: file_id}
"""
path_to_id = {}
for i in range(0, len(files_list), BATCH_SIZE):
chunk = files_list[i:i + BATCH_SIZE]
for f in chunk:
cur.execute(
"""INSERT INTO files
(relative_path, file_name, directory, file_size, mtime,
content_hash, first_seen_run, last_seen_run, exists_now)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 1)
ON DUPLICATE KEY UPDATE
file_name = VALUES(file_name),
directory = VALUES(directory),
file_size = VALUES(file_size),
mtime = VALUES(mtime),
content_hash = VALUES(content_hash),
last_seen_run = VALUES(last_seen_run),
exists_now = 1""",
(f["relative_path"], f["file_name"], f["directory"],
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
)
# Fetch real IDs
paths = [f["relative_path"] for f in chunk]
placeholders = ",".join(["%s"] * len(paths))
cur.execute(
f"SELECT id, relative_path FROM files WHERE relative_path IN ({placeholders})",
paths,
)
for row in cur.fetchall():
path_to_id[row[1]] = row[0]
return path_to_id
def batch_update_modified(cur, files_list: list, run_id: int):
"""
Batch UPDATE změněných souborů.
files_list: [{id, size, mtime, content_hash}]
"""
for i in range(0, len(files_list), BATCH_SIZE):
chunk = files_list[i:i + BATCH_SIZE]
cur.executemany(
"""UPDATE files
SET file_size = %s, mtime = %s, content_hash = %s,
last_seen_run = %s, exists_now = 1
WHERE id = %s""",
[(f["size"], f["mtime"], f["content_hash"], run_id, f["id"])
for f in chunk]
)
def batch_mark_deleted(cur, file_ids: list, run_id: int):
"""Batch UPDATE smazaných souborů — exists_now = 0."""
for i in range(0, len(file_ids), BATCH_SIZE):
chunk = file_ids[i:i + BATCH_SIZE]
cur.executemany(
"UPDATE files SET exists_now = 0, last_seen_run = %s WHERE id = %s",
[(run_id, fid) for fid in chunk]
)
def batch_update_unchanged(cur, file_ids: list, run_id: int):
"""Batch UPDATE nezměněných souborů — jen last_seen_run."""
for i in range(0, len(file_ids), BATCH_SIZE):
chunk = file_ids[i:i + BATCH_SIZE]
cur.executemany(
"UPDATE files SET last_seen_run = %s, exists_now = 1 WHERE id = %s",
[(run_id, fid) for fid in chunk]
)