z230
This commit is contained in:
184
indexer/db.py
184
indexer/db.py
@@ -1,91 +1,123 @@
|
||||
import pymysql
|
||||
import hashlib
|
||||
from indexer.config import DB_CONFIG, ROOT_NAME
|
||||
from datetime import datetime
|
||||
from indexer.config import DB_CONFIG, BATCH_SIZE
|
||||
|
||||
|
||||
def get_connection():
|
||||
return pymysql.connect(**DB_CONFIG)
|
||||
|
||||
|
||||
def preload_mark_all_missing():
|
||||
"""
|
||||
Na začátku běhu:
|
||||
označí všechny soubory jako neexistující.
|
||||
Ty, které skener znovu najde, se přepnou zpět na exists_now = 1.
|
||||
"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("UPDATE files SET exists_now = 0")
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
# ── Run management ──────────────────────────────────────────
|
||||
|
||||
|
||||
def path_hash(path: str) -> bytes:
|
||||
"""
|
||||
MD5 hash cesty – pouze identifikátor, ne bezpečnostní hash
|
||||
"""
|
||||
return hashlib.md5(path.encode("utf-8")).digest()
|
||||
|
||||
|
||||
def find_file_by_path(cur, path_hash_bytes):
|
||||
def create_run(cur) -> int:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id, file_size, mtime, content_hash
|
||||
FROM files
|
||||
WHERE path_hash = %s
|
||||
""",
|
||||
(path_hash_bytes,)
|
||||
)
|
||||
return cur.fetchone()
|
||||
|
||||
|
||||
def insert_file(cur, file):
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO files (
|
||||
root_name, full_path, path_hash,
|
||||
file_name, directory,
|
||||
file_size, mtime, content_hash,
|
||||
first_seen, last_seen, exists_now
|
||||
)
|
||||
VALUES (
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
%s, %s, %s,
|
||||
NOW(), NOW(), 1
|
||||
)
|
||||
""",
|
||||
(
|
||||
ROOT_NAME,
|
||||
file["full_path"],
|
||||
path_hash(file["full_path"]),
|
||||
file["file_name"],
|
||||
file["directory"],
|
||||
file["size"],
|
||||
file["mtime"],
|
||||
file["content_hash"],
|
||||
)
|
||||
"INSERT INTO runs (started_at, status) VALUES (%s, 'RUNNING')",
|
||||
(datetime.now(),)
|
||||
)
|
||||
return cur.lastrowid
|
||||
|
||||
|
||||
def update_file(cur, file_id, file):
|
||||
def finalize_run(cur, run_id: int, stats: dict):
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE files
|
||||
SET file_size = %s,
|
||||
mtime = %s,
|
||||
content_hash = %s,
|
||||
last_seen = NOW(),
|
||||
exists_now = 1
|
||||
WHERE id = %s
|
||||
""",
|
||||
(
|
||||
file["size"],
|
||||
file["mtime"],
|
||||
file["content_hash"],
|
||||
file_id,
|
||||
)
|
||||
"""UPDATE runs
|
||||
SET finished_at = %s, status = 'COMPLETED',
|
||||
files_total = %s, files_new = %s, files_modified = %s,
|
||||
files_deleted = %s, files_unchanged = %s
|
||||
WHERE id = %s""",
|
||||
(datetime.now(), stats["total"], stats["new"], stats["modified"],
|
||||
stats["deleted"], stats["unchanged"], run_id)
|
||||
)
|
||||
|
||||
|
||||
def fail_run(cur, run_id: int):
|
||||
cur.execute(
|
||||
"UPDATE runs SET finished_at = %s, status = 'FAILED' WHERE id = %s",
|
||||
(datetime.now(), run_id)
|
||||
)
|
||||
|
||||
|
||||
# ── Load DB state ──────────────────────────────────────────
|
||||
|
||||
def load_all_files(cur) -> dict:
|
||||
"""
|
||||
Načte všechny existující soubory z DB do RAM.
|
||||
Returns: {relative_path: {id, size, mtime, content_hash}}
|
||||
"""
|
||||
cur.execute(
|
||||
"""SELECT id, relative_path, file_size, mtime, content_hash
|
||||
FROM files WHERE exists_now = 1"""
|
||||
)
|
||||
result = {}
|
||||
for row in cur.fetchall():
|
||||
file_id, rel_path, size, mtime, content_hash = row
|
||||
result[rel_path] = {
|
||||
"id": file_id,
|
||||
"size": size,
|
||||
"mtime": mtime,
|
||||
"content_hash": content_hash,
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
# ── Batch operations ────────────────────────────────────────
|
||||
|
||||
def batch_insert_files(cur, files_list: list, run_id: int) -> dict:
|
||||
"""
|
||||
Batch INSERT nových souborů.
|
||||
files_list: [{relative_path, file_name, directory, size, mtime, content_hash}]
|
||||
Returns: {relative_path: file_id}
|
||||
"""
|
||||
path_to_id = {}
|
||||
for i in range(0, len(files_list), BATCH_SIZE):
|
||||
chunk = files_list[i:i + BATCH_SIZE]
|
||||
cur.executemany(
|
||||
"""INSERT INTO files
|
||||
(relative_path, file_name, directory, file_size, mtime,
|
||||
content_hash, first_seen_run, last_seen_run, exists_now)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 1)""",
|
||||
[(f["relative_path"], f["file_name"], f["directory"],
|
||||
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
|
||||
for f in chunk]
|
||||
)
|
||||
# pymysql executemany: lastrowid = first id in batch
|
||||
first_id = cur.lastrowid
|
||||
for j, f in enumerate(chunk):
|
||||
path_to_id[f["relative_path"]] = first_id + j
|
||||
return path_to_id
|
||||
|
||||
|
||||
def batch_update_modified(cur, files_list: list, run_id: int):
|
||||
"""
|
||||
Batch UPDATE změněných souborů.
|
||||
files_list: [{id, size, mtime, content_hash}]
|
||||
"""
|
||||
for i in range(0, len(files_list), BATCH_SIZE):
|
||||
chunk = files_list[i:i + BATCH_SIZE]
|
||||
cur.executemany(
|
||||
"""UPDATE files
|
||||
SET file_size = %s, mtime = %s, content_hash = %s,
|
||||
last_seen_run = %s, exists_now = 1
|
||||
WHERE id = %s""",
|
||||
[(f["size"], f["mtime"], f["content_hash"], run_id, f["id"])
|
||||
for f in chunk]
|
||||
)
|
||||
|
||||
|
||||
def batch_mark_deleted(cur, file_ids: list, run_id: int):
|
||||
"""Batch UPDATE smazaných souborů — exists_now = 0."""
|
||||
for i in range(0, len(file_ids), BATCH_SIZE):
|
||||
chunk = file_ids[i:i + BATCH_SIZE]
|
||||
cur.executemany(
|
||||
"UPDATE files SET exists_now = 0, last_seen_run = %s WHERE id = %s",
|
||||
[(run_id, fid) for fid in chunk]
|
||||
)
|
||||
|
||||
|
||||
def batch_update_unchanged(cur, file_ids: list, run_id: int):
|
||||
"""Batch UPDATE nezměněných souborů — jen last_seen_run."""
|
||||
for i in range(0, len(file_ids), BATCH_SIZE):
|
||||
chunk = file_ids[i:i + BATCH_SIZE]
|
||||
cur.executemany(
|
||||
"UPDATE files SET last_seen_run = %s, exists_now = 1 WHERE id = %s",
|
||||
[(run_id, fid) for fid in chunk]
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user