notebook
This commit is contained in:
0
indexer/__init__.py
Normal file
0
indexer/__init__.py
Normal file
32
indexer/config.py
Normal file
32
indexer/config.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# načti .env z rootu projektu
|
||||
load_dotenv()
|
||||
|
||||
# =========================
|
||||
# Database
|
||||
# =========================
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": os.getenv("DB_HOST"),
|
||||
"port": int(os.getenv("DB_PORT", 3306)),
|
||||
"user": os.getenv("DB_USER"),
|
||||
"password": os.getenv("DB_PASSWORD"),
|
||||
"database": os.getenv("DB_NAME"),
|
||||
"charset": "utf8mb4",
|
||||
"autocommit": False,
|
||||
}
|
||||
|
||||
# =========================
|
||||
# Filesystem
|
||||
# =========================
|
||||
|
||||
ROOT_PATH = os.getenv("ROOT_PATH")
|
||||
ROOT_NAME = os.getenv("ROOT_NAME", "ORDINACE")
|
||||
|
||||
# =========================
|
||||
# Behaviour
|
||||
# =========================
|
||||
|
||||
DRY_RUN = os.getenv("DRY_RUN", "1") == "1"
|
||||
91
indexer/db.py
Normal file
91
indexer/db.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import pymysql
|
||||
import hashlib
|
||||
from indexer.config import DB_CONFIG, ROOT_NAME
|
||||
|
||||
|
||||
def get_connection():
|
||||
return pymysql.connect(**DB_CONFIG)
|
||||
|
||||
|
||||
def preload_mark_all_missing():
|
||||
"""
|
||||
Na začátku běhu:
|
||||
označí všechny soubory jako neexistující.
|
||||
Ty, které skener znovu najde, se přepnou zpět na exists_now = 1.
|
||||
"""
|
||||
conn = get_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("UPDATE files SET exists_now = 0")
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def path_hash(path: str) -> bytes:
|
||||
"""
|
||||
MD5 hash cesty – pouze identifikátor, ne bezpečnostní hash
|
||||
"""
|
||||
return hashlib.md5(path.encode("utf-8")).digest()
|
||||
|
||||
|
||||
def find_file_by_path(cur, path_hash_bytes):
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT id, file_size, mtime, content_hash
|
||||
FROM files
|
||||
WHERE path_hash = %s
|
||||
""",
|
||||
(path_hash_bytes,)
|
||||
)
|
||||
return cur.fetchone()
|
||||
|
||||
|
||||
def insert_file(cur, file):
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO files (
|
||||
root_name, full_path, path_hash,
|
||||
file_name, directory,
|
||||
file_size, mtime, content_hash,
|
||||
first_seen, last_seen, exists_now
|
||||
)
|
||||
VALUES (
|
||||
%s, %s, %s,
|
||||
%s, %s,
|
||||
%s, %s, %s,
|
||||
NOW(), NOW(), 1
|
||||
)
|
||||
""",
|
||||
(
|
||||
ROOT_NAME,
|
||||
file["full_path"],
|
||||
path_hash(file["full_path"]),
|
||||
file["file_name"],
|
||||
file["directory"],
|
||||
file["size"],
|
||||
file["mtime"],
|
||||
file["content_hash"],
|
||||
)
|
||||
)
|
||||
return cur.lastrowid
|
||||
|
||||
|
||||
def update_file(cur, file_id, file):
|
||||
cur.execute(
|
||||
"""
|
||||
UPDATE files
|
||||
SET file_size = %s,
|
||||
mtime = %s,
|
||||
content_hash = %s,
|
||||
last_seen = NOW(),
|
||||
exists_now = 1
|
||||
WHERE id = %s
|
||||
""",
|
||||
(
|
||||
file["size"],
|
||||
file["mtime"],
|
||||
file["content_hash"],
|
||||
file_id,
|
||||
)
|
||||
)
|
||||
19
indexer/events.py
Normal file
19
indexer/events.py
Normal file
@@ -0,0 +1,19 @@
|
||||
def log_event(cur, file_id, event_type, old=None, new=None):
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO file_events (
|
||||
file_id, event_type, event_time,
|
||||
old_size, new_size,
|
||||
old_hash, new_hash
|
||||
)
|
||||
VALUES (%s, %s, NOW(), %s, %s, %s, %s)
|
||||
""",
|
||||
(
|
||||
file_id,
|
||||
event_type,
|
||||
old["size"] if old else None,
|
||||
new["size"] if new else None,
|
||||
old["content_hash"] if old else None,
|
||||
new["content_hash"] if new else None,
|
||||
)
|
||||
)
|
||||
12
indexer/hasher.py
Normal file
12
indexer/hasher.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from blake3 import blake3
|
||||
|
||||
|
||||
def blake3_file(path, chunk_size=1024 * 1024):
|
||||
"""
|
||||
Spočítá BLAKE3 hash souboru po blocích (bez načtení do paměti)
|
||||
"""
|
||||
h = blake3()
|
||||
with open(path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b""):
|
||||
h.update(chunk)
|
||||
return h.digest()
|
||||
21
indexer/scanner.py
Normal file
21
indexer/scanner.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
from indexer.hasher import blake3_file
|
||||
|
||||
def scan_files(root_path):
|
||||
for root, _, files in os.walk(root_path):
|
||||
for name in files:
|
||||
full_path = os.path.join(root, name)
|
||||
try:
|
||||
stat = os.stat(full_path)
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
yield {
|
||||
"full_path": full_path.replace("\\", "/"),
|
||||
"file_name": name,
|
||||
"directory": root.replace("\\", "/"),
|
||||
"size": stat.st_size,
|
||||
"mtime": datetime.fromtimestamp(stat.st_mtime),
|
||||
"content_hash": blake3_file(full_path),
|
||||
}
|
||||
Reference in New Issue
Block a user