This commit is contained in:
2026-02-12 07:54:22 +01:00
parent 42cd021b9c
commit bd4aede24a
3 changed files with 96 additions and 22 deletions

View File

@@ -63,23 +63,33 @@ def load_all_files(cur) -> dict:
def batch_insert_files(cur, files_list: list, run_id: int) -> dict:
"""
Batch INSERT nových souborů.
Batch INSERT (or re-activate) souborů.
Handles re-appearing files that were previously deleted (exists_now=0)
via ON DUPLICATE KEY UPDATE.
files_list: [{relative_path, file_name, directory, size, mtime, content_hash}]
Returns: {relative_path: file_id}
"""
path_to_id = {}
for i in range(0, len(files_list), BATCH_SIZE):
chunk = files_list[i:i + BATCH_SIZE]
cur.executemany(
"""INSERT INTO files
(relative_path, file_name, directory, file_size, mtime,
content_hash, first_seen_run, last_seen_run, exists_now)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 1)""",
[(f["relative_path"], f["file_name"], f["directory"],
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
for f in chunk]
)
# Fetch real IDs — lastrowid+j is unreliable with executemany
for f in chunk:
cur.execute(
"""INSERT INTO files
(relative_path, file_name, directory, file_size, mtime,
content_hash, first_seen_run, last_seen_run, exists_now)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 1)
ON DUPLICATE KEY UPDATE
file_name = VALUES(file_name),
directory = VALUES(directory),
file_size = VALUES(file_size),
mtime = VALUES(mtime),
content_hash = VALUES(content_hash),
last_seen_run = VALUES(last_seen_run),
exists_now = 1""",
(f["relative_path"], f["file_name"], f["directory"],
f["size"], f["mtime"], f["content_hash"], run_id, run_id)
)
# Fetch real IDs
paths = [f["relative_path"] for f in chunk]
placeholders = ",".join(["%s"] * len(paths))
cur.execute(