This commit is contained in:
2026-01-16 15:34:12 +01:00
parent 186c98fd0d
commit 2d2a60a845
6 changed files with 850 additions and 1 deletions

View File

@@ -0,0 +1,293 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Delta sync Medevio communication.
Stáhne pouze zprávy změněné po messagesProcessed pro každý požadavek.
"""
import json
import requests
import pymysql
from pathlib import Path
from datetime import datetime
import time
import sys
# ==============================
# UTF-8 SAFE OUTPUT
# ==============================
try:
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')
except AttributeError:
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
def safe_print(text: str):
enc = sys.stdout.encoding or ""
if not enc.lower().startswith("utf"):
text = ''.join(ch for ch in text if ord(ch) < 65536)
try:
print(text)
except UnicodeEncodeError:
text = ''.join(ch for ch in text if ord(ch) < 128)
print(text)
# ==============================
# CONFIG
# ==============================
TOKEN_PATH = Path("token.txt")
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "medevio",
"charset": "utf8mb4",
"cursorclass": pymysql.cursors.DictCursor,
}
GRAPHQL_QUERY_MESSAGES = r"""
query UseMessages_ListMessages($requestId: String!, $updatedSince: DateTime) {
messages: listMessages(
patientRequestId: $requestId,
updatedSince: $updatedSince
) {
id
createdAt
updatedAt
readAt
text
type
sender {
id
name
surname
clinicId
}
medicalRecord {
id
description
contentType
url
downloadUrl
createdAt
updatedAt
}
}
}
"""
# ==============================
# HELPERS
# ==============================
def parse_dt(s):
if not s:
return None
try:
return datetime.fromisoformat(s.replace("Z", "+00:00"))
except Exception:
return None
def read_token(path: Path) -> str:
tok = path.read_text(encoding="utf-8").strip()
return tok.replace("Bearer ", "")
# ==============================
# FETCH MESSAGES (DELTA)
# ==============================
def fetch_messages(headers, request_id, updated_since):
payload = {
"operationName": "UseMessages_ListMessages",
"query": GRAPHQL_QUERY_MESSAGES,
"variables": {
"requestId": request_id,
"updatedSince": updated_since,
},
}
r = requests.post(
"https://api.medevio.cz/graphql",
json=payload,
headers=headers,
timeout=30
)
if r.status_code != 200:
safe_print(f"❌ HTTP {r.status_code} for request {request_id}")
return []
j = r.json()
if "errors" in j:
safe_print(f"❌ GraphQL error for {request_id}: {j['errors']}")
return []
return j.get("data", {}).get("messages", []) or []
# ==============================
# INSERT MESSAGE
# ==============================
def insert_message(cur, req_id, msg):
sender = msg.get("sender") or {}
sender_name = " ".join(
x for x in [sender.get("name"), sender.get("surname")] if x
) or None
mr = msg.get("medicalRecord") or {}
sql = """
INSERT INTO medevio_conversation (
id, request_id,
sender_name, sender_id, sender_clinic_id,
text, created_at, read_at, updated_at,
attachment_url, attachment_description, attachment_content_type
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON DUPLICATE KEY UPDATE
sender_name = VALUES(sender_name),
sender_id = VALUES(sender_id),
sender_clinic_id = VALUES(sender_clinic_id),
text = VALUES(text),
created_at = VALUES(created_at),
read_at = VALUES(read_at),
updated_at = VALUES(updated_at),
attachment_url = VALUES(attachment_url),
attachment_description = VALUES(attachment_description),
attachment_content_type = VALUES(attachment_content_type)
"""
cur.execute(sql, (
msg.get("id"),
req_id,
sender_name,
sender.get("id"),
sender.get("clinicId"),
msg.get("text"),
parse_dt(msg.get("createdAt")),
parse_dt(msg.get("readAt")),
parse_dt(msg.get("updatedAt")),
mr.get("downloadUrl") or mr.get("url"),
mr.get("description"),
mr.get("contentType")
))
# ==============================
# INSERT ATTACHMENT (DEDUP)
# ==============================
def insert_download(cur, req_id, msg, existing_ids):
mr = msg.get("medicalRecord") or {}
attachment_id = mr.get("id")
if not attachment_id or attachment_id in existing_ids:
return
url = mr.get("downloadUrl") or mr.get("url")
if not url:
return
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
data = r.content
except Exception as e:
safe_print(f"⚠️ Attachment download failed: {e}")
return
filename = url.split("/")[-1].split("?")[0]
cur.execute("""
INSERT INTO medevio_downloads (
request_id, attachment_id, attachment_type,
filename, content_type, file_size, created_at, file_content
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
ON DUPLICATE KEY UPDATE
file_content = VALUES(file_content),
file_size = VALUES(file_size),
downloaded_at = NOW()
""", (
req_id,
attachment_id,
"MESSAGE_ATTACHMENT",
filename,
mr.get("contentType"),
len(data),
parse_dt(msg.get("createdAt")),
data
))
existing_ids.add(attachment_id)
# ==============================
# MAIN
# ==============================
def main():
token = read_token(TOKEN_PATH)
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
conn = pymysql.connect(**DB_CONFIG)
# existing attachments
with conn.cursor() as cur:
cur.execute("SELECT attachment_id FROM medevio_downloads")
existing_ids = {r["attachment_id"] for r in cur.fetchall()}
# select requests needing sync
with conn.cursor() as cur:
cur.execute("""
SELECT id, messagesProcessed
FROM pozadavky
WHERE messagesProcessed IS NULL
OR messagesProcessed < updatedAt
""")
rows = cur.fetchall()
safe_print(f"📋 Found {len(rows)} requests for message delta-sync\n")
for i, row in enumerate(rows, 1):
req_id = row["id"]
updated_since = row["messagesProcessed"]
if updated_since:
updated_since = updated_since.replace(microsecond=0).isoformat() + "Z"
safe_print(f"[{i}/{len(rows)}] {req_id}")
messages = fetch_messages(headers, req_id, updated_since)
if not messages:
safe_print(" ⏭ No new messages")
else:
with conn.cursor() as cur:
for msg in messages:
insert_message(cur, req_id, msg)
insert_download(cur, req_id, msg, existing_ids)
conn.commit()
safe_print(f"{len(messages)} new/updated messages")
with conn.cursor() as cur:
cur.execute(
"UPDATE pozadavky SET messagesProcessed = NOW() WHERE id = %s",
(req_id,)
)
conn.commit()
time.sleep(0.25)
conn.close()
safe_print("\n🎉 Delta message sync DONE")
# ==============================
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,239 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql
import requests
from pathlib import Path
from datetime import datetime
from dateutil import parser
import time
import sys
# ================================
# UTF-8 SAFE OUTPUT (Windows friendly)
# ================================
try:
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')
except AttributeError:
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
def safe_print(text: str):
enc = sys.stdout.encoding or ""
if not enc.lower().startswith("utf"):
text = ''.join(ch for ch in text if ord(ch) < 65536)
try:
print(text)
except UnicodeEncodeError:
text = ''.join(ch for ch in text if ord(ch) < 128)
print(text)
# ================================
# 🔧 CONFIG
# ================================
TOKEN_PATH = Path("token.txt")
CLINIC_SLUG = "mudr-buzalkova"
BATCH_SIZE = 500
STATES = ["ACTIVE", "DONE"] # explicitně jinak API vrací jen ACTIVE
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "medevio",
"charset": "utf8mb4",
"cursorclass": pymysql.cursors.DictCursor,
}
GRAPHQL_QUERY = r"""
query ClinicRequestList2(
$clinicSlug: String!,
$queueId: String,
$queueAssignment: QueueAssignmentFilter!,
$state: PatientRequestState,
$pageInfo: PageInfo!,
$locale: Locale!
) {
requestsResponse: listPatientRequestsForClinic2(
clinicSlug: $clinicSlug,
queueId: $queueId,
queueAssignment: $queueAssignment,
state: $state,
pageInfo: $pageInfo
) {
count
patientRequests {
id
displayTitle(locale: $locale)
createdAt
updatedAt
doneAt
removedAt
extendedPatient {
name
surname
identificationNumber
}
lastMessage {
createdAt
}
}
}
}
"""
# ================================
# TOKEN
# ================================
def read_token(path: Path) -> str:
tok = path.read_text(encoding="utf-8").strip()
if tok.startswith("Bearer "):
return tok.split(" ", 1)[1]
return tok
# ================================
# DATETIME PARSER
# ================================
def to_mysql_dt(iso_str):
if not iso_str:
return None
try:
dt = parser.isoparse(iso_str)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.now().astimezone().tzinfo)
return dt.astimezone().strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return None
# ================================
# UPSERT
# ================================
def upsert(conn, r):
p = r.get("extendedPatient") or {}
api_updated = to_mysql_dt(r.get("updatedAt"))
msg_updated = to_mysql_dt((r.get("lastMessage") or {}).get("createdAt"))
final_updated = max(filter(None, [api_updated, msg_updated]), default=None)
sql = """
INSERT INTO pozadavky (
id, displayTitle, createdAt, updatedAt, doneAt, removedAt,
pacient_jmeno, pacient_prijmeni, pacient_rodnecislo
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON DUPLICATE KEY UPDATE
displayTitle=VALUES(displayTitle),
updatedAt=VALUES(updatedAt),
doneAt=VALUES(doneAt),
removedAt=VALUES(removedAt),
pacient_jmeno=VALUES(pacient_jmeno),
pacient_prijmeni=VALUES(pacient_prijmeni),
pacient_rodnecislo=VALUES(pacient_rodnecislo)
"""
vals = (
r.get("id"),
r.get("displayTitle"),
to_mysql_dt(r.get("createdAt")),
final_updated,
to_mysql_dt(r.get("doneAt")),
to_mysql_dt(r.get("removedAt")),
p.get("name"),
p.get("surname"),
p.get("identificationNumber"),
)
with conn.cursor() as cur:
cur.execute(sql, vals)
conn.commit()
# ================================
# FETCH PAGE (per state)
# ================================
def fetch_state(headers, state, offset):
variables = {
"clinicSlug": CLINIC_SLUG,
"queueId": None,
"queueAssignment": "ANY",
"state": state,
"pageInfo": {"first": BATCH_SIZE, "offset": offset},
"locale": "cs",
}
payload = {
"operationName": "ClinicRequestList2",
"query": GRAPHQL_QUERY,
"variables": variables,
}
r = requests.post("https://api.medevio.cz/graphql", json=payload, headers=headers)
r.raise_for_status()
data = r.json()["data"]["requestsResponse"]
return data.get("patientRequests", []), data.get("count", 0)
# ================================
# MAIN
# ================================
def main():
token = read_token(TOKEN_PATH)
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"Accept": "application/json",
}
conn = pymysql.connect(**DB_CONFIG)
safe_print(f"\n=== FULL Medevio READ-ALL sync @ {datetime.now():%Y-%m-%d %H:%M:%S} ===")
grand_total = 0
for state in STATES:
safe_print(f"\n🔁 STATE = {state}")
offset = 0
total = None
processed = 0
while True:
batch, count = fetch_state(headers, state, offset)
if total is None:
total = count
safe_print(f"📡 {state}: celkem {total}")
if not batch:
break
for r in batch:
upsert(conn, r)
processed += len(batch)
safe_print(f"{processed}/{total}")
offset += BATCH_SIZE
if offset >= count:
break
time.sleep(0.4)
grand_total += processed
conn.close()
safe_print(f"\n✅ HOTOVO celkem zpracováno {grand_total} požadavků\n")
# ================================
if __name__ == "__main__":
main()

View File

@@ -21,7 +21,7 @@ import argparse
# ============================== # ==============================
# 🔧 CONFIGURATION # 🔧 CONFIGURATION
# ============================== # ==============================
TOKEN_PATH = Path("token.txt") TOKEN_PATH = Path("../10ReadPozadavky/token.txt")
DB_CONFIG = { DB_CONFIG = {
"host": "192.168.1.76", "host": "192.168.1.76",

View File

@@ -0,0 +1 @@
{"cookies": [{"name": "gateway-access-token", "value": "YwBgkf8McREDKs7vCZj0EZD2fJsuV8RyDPtYx7WiDoz0nFJ9kxId8kcNEPBLFSwM+Tiz80+SOdFwo+oj", "domain": "my.medevio.cz", "path": "/", "expires": 1763372319, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "aws-waf-token", "value": "b6a1d4eb-4350-40e5-8e52-1f5f9600fbb8:CgoAr9pC8c6zAAAA:OYwXLY5OyitSQPl5v2oIlS+hIxsrb5LxV4VjCyE2gJCFFE5PQu+0Zbxse2ZIofrNv5QKs0TYUDTmxPhZyTr9Qtjnq2gsVQxWHXzrbebv3Z7RbzB63u6Ymn3Fo8IbDev3CfCNcNuxCKltFEXLqSCjI2vqNY+7HZkgQBIqy2wMgzli3aSLq0w8lWYtZzyyot7q8RPXWMGTfaBUo2reY0SOSffm9rAivE9PszNfPid71CvNrGAAoxRbwb25eVujlyIcDVWe5vZ9Iw==", "domain": ".my.medevio.cz", "path": "/", "expires": 1761125920, "httpOnly": false, "secure": true, "sameSite": "Lax"}], "origins": [{"origin": "https://my.medevio.cz", "localStorage": [{"name": "awswaf_token_refresh_timestamp", "value": "1760780309860"}, {"name": "awswaf_session_storage", "value": "b6a1d4eb-4350-40e5-8e52-1f5f9600fbb8:CgoAr9pC8c+zAAAA:+vw//1NzmePjPpbGCJzUB+orCRivtJd098DbDX4AnABiGRw/+ql6ShqvFY4YdCY7w2tegb5mEPBdAmc4sNi22kNR9BuEoAgCUiMhkU1AZWfzM51zPfTh7SveCrREZ7xdvxcqKPMmfVLRYX5E4+UWh22z/LKQ7+d9VERp3J+wWCUW3dFFirkezy3N7b2FVjTlY/RxsZwhejQziTG/L3CkIFFP3mOReNgBvDpj7aKoM1knY4IL4TZ8E7zNv3nTsvzACLYvnUutVOUcofN1TfOzwZshSKsEXsMzrQn8PzLccX1jM5VSzce7gfEzl0zSPsT8NB3Sna+rhMIttDNYgvbW1HsfG2LIeKMR27Zf8hkslDRVVkcU/Kp2jLOEdhhrBKGjKY2o9/uX3NExdzh5MEKQSSRtmue01BpWYILPH23rMsz4YSmF+Ough5OeQoC95rkcYwVXMhwvUN9Zfp9UZ4xCNfFUex5dOrg9aJntYRnaceeocGUttNI5AdT0i3+osV6XHXzKxeqO8zLCS9BIsCzxaHfdqqem5DorMceuGKz+QqksatIQAA=="}, {"name": "Application.Intl.locale", "value": "cs"}, {"name": "Password.prefill", "value": "{\"username\":\"vladimir.buzalka@buzalka.cz\",\"type\":\"email\"}"}]}]}

View File

@@ -0,0 +1 @@
nYvrvgflIKcDiQg8Hhpud+qG8iGZ8eH8su4nyT/Mgcm7XQp65ygY9s39+O01wIpk/7sKd6fBHkiKvsqH

315
dddddd.py Normal file
View File

@@ -0,0 +1,315 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
FAST FILE HASH INDEXER UNRAID (BLAKE3 ONLY, ALL SHARES)
- HARDCODED SINGLE SHARE MODE
- SQL OPTIMIZATION
- STRICT MODE (NO TOLERANCE) - Updates DB on any mismatch
"""
import os
import pymysql
import socket
import platform
from blake3 import blake3
# ==============================
# ENV / HOST
# ==============================
HOSTNAME = socket.gethostname()
OS_NAME = platform.system()
# ZDE JE TO NATVRDO PRO TESTOVÁNÍ:
# SCAN_ONLY_THIS = None #"#Fotky"
SCAN_ONLY_THIS = '#Library' # "#Fotky"
# ==============================
# CONFIG
# ==============================
EXCLUDED_SHARES = {"domains", "appdata", "system", "isos"}
# --- File size limits (bytes) ---
FILE_MIN_SIZE = 0
FILE_MAX_SIZE = 1024 * 1024 * 1024 * 1024 # 50MB
DB_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "torrents",
"charset": "utf8mb4",
"autocommit": True,
}
CHUNK_SIZE = 4 * 1024 * 1024 # 4 MB
PRINT_SKIPPED = False
# ==============================
# HASH
# ==============================
def compute_blake3(path: str) -> bytes:
h = blake3()
with open(path, "rb") as f:
for chunk in iter(lambda: f.read(CHUNK_SIZE), b""):
h.update(chunk)
return h.digest()
# ==============================
# SHARE / PATH HELPERS
# ==============================
def get_user_shares():
if SCAN_ONLY_THIS:
path = f"/mnt/user/{SCAN_ONLY_THIS}"
if os.path.isdir(path):
print(f"🎯 SINGLE SHARE MODE ACTIVE: Scanning only '{SCAN_ONLY_THIS}'")
return [SCAN_ONLY_THIS]
else:
print(f"⚠️ ERROR: Requested share '{SCAN_ONLY_THIS}' not found in /mnt/user!")
return []
shares = []
if not os.path.exists("/mnt/user"):
return []
for name in os.listdir("/mnt/user"):
if name.startswith("."):
continue
if name in EXCLUDED_SHARES:
continue
path = f"/mnt/user/{name}"
if os.path.isdir(path):
shares.append(name)
return sorted(shares)
def find_physical_roots(shares):
roots = []
if not os.path.exists("/mnt"):
return []
for disk in os.listdir("/mnt"):
if not disk.startswith("disk"):
continue
for share in shares:
path = f"/mnt/{disk}/{share}"
if os.path.isdir(path):
roots.append((share, path))
return sorted(roots)
def logical_path_from_disk_path(disk_path: str) -> str:
if not disk_path.startswith("/mnt/disk"):
raise ValueError(f"Unexpected disk path: {disk_path}")
parts = disk_path.split("/", 3)
return f"/mnt/user/{parts[3]}"
def size_allowed(size: int) -> bool:
if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE:
return False
if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE:
return False
return True
# ==============================
# MAIN
# ==============================
def main():
print("🚀 BLAKE3 indexer starting", flush=True)
print(f"🖥 Host: {HOSTNAME} | OS: {OS_NAME}", flush=True)
if FILE_MIN_SIZE or FILE_MAX_SIZE:
print(f"📏 File size limits: min={FILE_MIN_SIZE} max={FILE_MAX_SIZE}", flush=True)
shares = get_user_shares()
if not shares:
print("❌ No user shares to index!", flush=True)
return
print("📦 User shares to index:", flush=True)
for s in shares:
print(f" - {s}", flush=True)
scan_roots = find_physical_roots(shares)
if not scan_roots:
print("❌ No physical disk roots found!", flush=True)
return
print("📂 Physical scan roots:", flush=True)
for _, path in scan_roots:
print(f" - {path}", flush=True)
try:
db = pymysql.connect(**DB_CONFIG)
cur = db.cursor()
# === TOTO JE TEN PŘÍKAZ "NEPŘEMÝŠLEJ" ===
# Nastaví relaci na UTC. MySQL přestane posouvat časy o hodinu sem a tam.
# cur.execute("SET time_zone = '+00:00'")
# =========================================
except Exception as e:
print(f"❌ Database connection failed: {e}")
return
print("📥 Loading already indexed files into memory...", flush=True)
# === OPTIMALIZACE SQL ===
if SCAN_ONLY_THIS:
search_pattern = f"/mnt/user/{SCAN_ONLY_THIS}%"
print(f"⚡ OPTIMIZATION: Fetching only DB records for '{search_pattern}'", flush=True)
cur.execute("""
SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
FROM file_md5_index
WHERE host_name = %s AND full_path LIKE %s
""", (HOSTNAME, search_pattern))
else:
cur.execute("""
SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
FROM file_md5_index
WHERE host_name = %s
""", (HOSTNAME,))
# Načteme do slovníku pro rychlé vyhledávání
# Formát: { "cesta": (velikost, mtime) }
indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()}
print(f"✅ Loaded {len(indexed_map):,} indexed entries", flush=True)
print("======================================", flush=True)
new_files = 0
skipped = 0
filtered = 0
seen_paths = set()
# --- SCAN ---
for share, scan_root in scan_roots:
for root, _, files in os.walk(scan_root):
for fname in files:
disk_path = os.path.join(root, fname)
try:
stat = os.stat(disk_path)
except OSError:
continue
size = stat.st_size
if not size_allowed(size):
filtered += 1
continue
logical_path = logical_path_from_disk_path(disk_path)
if logical_path in seen_paths:
continue
seen_paths.add(logical_path)
mtime = int(stat.st_mtime)
# === PŘÍSNÁ KONTROLA (ŽÁDNÁ TOLERANCE) ===
# Pokud soubor v DB existuje a přesně sedí velikost i čas, přeskočíme ho.
# Vše ostatní (včetně posunu času o 1s) se považuje za změnu a aktualizuje se.
is_match = False
if logical_path in indexed_map:
db_size, db_mtime = indexed_map[logical_path]
if size == db_size and mtime == db_mtime:
is_match = True
if is_match:
skipped += 1
if PRINT_SKIPPED:
print(f"⏭ SKIP {logical_path}", flush=True)
continue
# ============================================
print(" NEW / UPDATED", flush=True)
print(f" File: {logical_path}", flush=True)
print(f" Size: {size:,} B", flush=True)
try:
b3 = compute_blake3(disk_path)
except Exception as e:
print(f"❌ BLAKE3 failed: {e}", flush=True)
continue
# Zde proběhne UPDATE mtime na hodnotu z disku
cur.execute("""
INSERT INTO file_md5_index
(os_name, host_name, full_path, file_name, directory,
file_size, mtime, blake3)
VALUES (%s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s)
ON DUPLICATE KEY UPDATE
file_size = VALUES(file_size),
mtime = VALUES(mtime),
blake3 = VALUES(blake3),
updated_at = CURRENT_TIMESTAMP
""", (
OS_NAME,
HOSTNAME,
logical_path,
fname,
os.path.dirname(logical_path),
size,
mtime,
b3,
))
new_files += 1
print(f" B3 : {b3.hex()}", flush=True)
print("--------------------------------------", flush=True)
print("======================================", flush=True)
print(f"✅ New / updated : {new_files}", flush=True)
print(f"⏭ Skipped : {skipped}", flush=True)
print(f"🚫 Size filtered: {filtered}", flush=True)
print("🏁 Script finished", flush=True)
# ==============================
# DB CLEANUP REMOVE DELETED FILES
# ==============================
print("🧹 Checking for deleted files in DB...", flush=True)
db_paths = set(indexed_map.keys())
deleted_paths = db_paths - seen_paths
# Omezíme jen na aktuální share (pokud je aktivní)
if SCAN_ONLY_THIS:
prefix = f"/mnt/user/{SCAN_ONLY_THIS}/"
deleted_paths = {p for p in deleted_paths if p.startswith(prefix)}
if deleted_paths:
print(f"🗑 Removing {len(deleted_paths):,} deleted files from DB", flush=True)
BATCH_SIZE = 1000
deleted_paths = list(deleted_paths)
for i in range(0, len(deleted_paths), BATCH_SIZE):
batch = deleted_paths[i:i + BATCH_SIZE]
placeholders = ",".join(["%s"] * len(batch))
sql = f"""
DELETE FROM file_md5_index
WHERE host_name = %s
AND full_path IN ({placeholders})
"""
cur.execute(sql, (HOSTNAME, *batch))
print("✅ DB cleanup completed", flush=True)
else:
print("✅ No deleted files found in DB", flush=True)
cur.close()
db.close()
if __name__ == "__main__":
main()