diff --git a/10ReadPozadavky/PRAVIDELNE_3_StahniKomunikaciDELTA.py b/10ReadPozadavky/PRAVIDELNE_3_StahniKomunikaciDELTA.py new file mode 100644 index 0000000..b3f5d60 --- /dev/null +++ b/10ReadPozadavky/PRAVIDELNE_3_StahniKomunikaciDELTA.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Delta sync Medevio communication. +Stáhne pouze zprávy změněné po messagesProcessed pro každý požadavek. +""" + +import json +import requests +import pymysql +from pathlib import Path +from datetime import datetime +import time +import sys + +# ============================== +# UTF-8 SAFE OUTPUT +# ============================== +try: + sys.stdout.reconfigure(encoding='utf-8') + sys.stderr.reconfigure(encoding='utf-8') +except AttributeError: + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') + + +def safe_print(text: str): + enc = sys.stdout.encoding or "" + if not enc.lower().startswith("utf"): + text = ''.join(ch for ch in text if ord(ch) < 65536) + try: + print(text) + except UnicodeEncodeError: + text = ''.join(ch for ch in text if ord(ch) < 128) + print(text) + + +# ============================== +# CONFIG +# ============================== +TOKEN_PATH = Path("token.txt") + +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "medevio", + "charset": "utf8mb4", + "cursorclass": pymysql.cursors.DictCursor, +} + +GRAPHQL_QUERY_MESSAGES = r""" +query UseMessages_ListMessages($requestId: String!, $updatedSince: DateTime) { + messages: listMessages( + patientRequestId: $requestId, + updatedSince: $updatedSince + ) { + id + createdAt + updatedAt + readAt + text + type + sender { + id + name + surname + clinicId + } + medicalRecord { + id + description + contentType + url + downloadUrl + createdAt + updatedAt + } + } +} +""" + + +# ============================== +# HELPERS +# ============================== +def parse_dt(s): + if not s: + return None + try: + return datetime.fromisoformat(s.replace("Z", "+00:00")) + except Exception: + return None + + +def read_token(path: Path) -> str: + tok = path.read_text(encoding="utf-8").strip() + return tok.replace("Bearer ", "") + + +# ============================== +# FETCH MESSAGES (DELTA) +# ============================== +def fetch_messages(headers, request_id, updated_since): + payload = { + "operationName": "UseMessages_ListMessages", + "query": GRAPHQL_QUERY_MESSAGES, + "variables": { + "requestId": request_id, + "updatedSince": updated_since, + }, + } + + r = requests.post( + "https://api.medevio.cz/graphql", + json=payload, + headers=headers, + timeout=30 + ) + + if r.status_code != 200: + safe_print(f"❌ HTTP {r.status_code} for request {request_id}") + return [] + + j = r.json() + if "errors" in j: + safe_print(f"❌ GraphQL error for {request_id}: {j['errors']}") + return [] + + return j.get("data", {}).get("messages", []) or [] + + +# ============================== +# INSERT MESSAGE +# ============================== +def insert_message(cur, req_id, msg): + sender = msg.get("sender") or {} + sender_name = " ".join( + x for x in [sender.get("name"), sender.get("surname")] if x + ) or None + + mr = msg.get("medicalRecord") or {} + + sql = """ + INSERT INTO medevio_conversation ( + id, request_id, + sender_name, sender_id, sender_clinic_id, + text, created_at, read_at, updated_at, + attachment_url, attachment_description, attachment_content_type + ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) + ON DUPLICATE KEY UPDATE + sender_name = VALUES(sender_name), + sender_id = VALUES(sender_id), + sender_clinic_id = VALUES(sender_clinic_id), + text = VALUES(text), + created_at = VALUES(created_at), + read_at = VALUES(read_at), + updated_at = VALUES(updated_at), + attachment_url = VALUES(attachment_url), + attachment_description = VALUES(attachment_description), + attachment_content_type = VALUES(attachment_content_type) + """ + + cur.execute(sql, ( + msg.get("id"), + req_id, + sender_name, + sender.get("id"), + sender.get("clinicId"), + msg.get("text"), + parse_dt(msg.get("createdAt")), + parse_dt(msg.get("readAt")), + parse_dt(msg.get("updatedAt")), + mr.get("downloadUrl") or mr.get("url"), + mr.get("description"), + mr.get("contentType") + )) + + +# ============================== +# INSERT ATTACHMENT (DEDUP) +# ============================== +def insert_download(cur, req_id, msg, existing_ids): + mr = msg.get("medicalRecord") or {} + attachment_id = mr.get("id") + if not attachment_id or attachment_id in existing_ids: + return + + url = mr.get("downloadUrl") or mr.get("url") + if not url: + return + + try: + r = requests.get(url, timeout=30) + r.raise_for_status() + data = r.content + except Exception as e: + safe_print(f"⚠️ Attachment download failed: {e}") + return + + filename = url.split("/")[-1].split("?")[0] + + cur.execute(""" + INSERT INTO medevio_downloads ( + request_id, attachment_id, attachment_type, + filename, content_type, file_size, created_at, file_content + ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s) + ON DUPLICATE KEY UPDATE + file_content = VALUES(file_content), + file_size = VALUES(file_size), + downloaded_at = NOW() + """, ( + req_id, + attachment_id, + "MESSAGE_ATTACHMENT", + filename, + mr.get("contentType"), + len(data), + parse_dt(msg.get("createdAt")), + data + )) + + existing_ids.add(attachment_id) + + +# ============================== +# MAIN +# ============================== +def main(): + token = read_token(TOKEN_PATH) + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "Accept": "application/json", + } + + conn = pymysql.connect(**DB_CONFIG) + + # existing attachments + with conn.cursor() as cur: + cur.execute("SELECT attachment_id FROM medevio_downloads") + existing_ids = {r["attachment_id"] for r in cur.fetchall()} + + # select requests needing sync + with conn.cursor() as cur: + cur.execute(""" + SELECT id, messagesProcessed + FROM pozadavky + WHERE messagesProcessed IS NULL + OR messagesProcessed < updatedAt + """) + rows = cur.fetchall() + + safe_print(f"📋 Found {len(rows)} requests for message delta-sync\n") + + for i, row in enumerate(rows, 1): + req_id = row["id"] + updated_since = row["messagesProcessed"] + if updated_since: + updated_since = updated_since.replace(microsecond=0).isoformat() + "Z" + + safe_print(f"[{i}/{len(rows)}] {req_id}") + + messages = fetch_messages(headers, req_id, updated_since) + if not messages: + safe_print(" ⏭ No new messages") + else: + with conn.cursor() as cur: + for msg in messages: + insert_message(cur, req_id, msg) + insert_download(cur, req_id, msg, existing_ids) + conn.commit() + safe_print(f" ✅ {len(messages)} new/updated messages") + + with conn.cursor() as cur: + cur.execute( + "UPDATE pozadavky SET messagesProcessed = NOW() WHERE id = %s", + (req_id,) + ) + conn.commit() + + time.sleep(0.25) + + conn.close() + safe_print("\n🎉 Delta message sync DONE") + + +# ============================== +if __name__ == "__main__": + main() diff --git a/12 Readallinbatches/10 Readallpozadavkyinbatches.py b/12 Readallinbatches/10 Readallpozadavkyinbatches.py new file mode 100644 index 0000000..f6387f6 --- /dev/null +++ b/12 Readallinbatches/10 Readallpozadavkyinbatches.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import pymysql +import requests +from pathlib import Path +from datetime import datetime +from dateutil import parser +import time +import sys + +# ================================ +# UTF-8 SAFE OUTPUT (Windows friendly) +# ================================ +try: + sys.stdout.reconfigure(encoding='utf-8') + sys.stderr.reconfigure(encoding='utf-8') +except AttributeError: + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') + + +def safe_print(text: str): + enc = sys.stdout.encoding or "" + if not enc.lower().startswith("utf"): + text = ''.join(ch for ch in text if ord(ch) < 65536) + try: + print(text) + except UnicodeEncodeError: + text = ''.join(ch for ch in text if ord(ch) < 128) + print(text) + + +# ================================ +# 🔧 CONFIG +# ================================ +TOKEN_PATH = Path("token.txt") +CLINIC_SLUG = "mudr-buzalkova" + +BATCH_SIZE = 500 +STATES = ["ACTIVE", "DONE"] # explicitně – jinak API vrací jen ACTIVE + +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "medevio", + "charset": "utf8mb4", + "cursorclass": pymysql.cursors.DictCursor, +} + +GRAPHQL_QUERY = r""" +query ClinicRequestList2( + $clinicSlug: String!, + $queueId: String, + $queueAssignment: QueueAssignmentFilter!, + $state: PatientRequestState, + $pageInfo: PageInfo!, + $locale: Locale! +) { + requestsResponse: listPatientRequestsForClinic2( + clinicSlug: $clinicSlug, + queueId: $queueId, + queueAssignment: $queueAssignment, + state: $state, + pageInfo: $pageInfo + ) { + count + patientRequests { + id + displayTitle(locale: $locale) + createdAt + updatedAt + doneAt + removedAt + extendedPatient { + name + surname + identificationNumber + } + lastMessage { + createdAt + } + } + } +} +""" + + +# ================================ +# TOKEN +# ================================ +def read_token(path: Path) -> str: + tok = path.read_text(encoding="utf-8").strip() + if tok.startswith("Bearer "): + return tok.split(" ", 1)[1] + return tok + + +# ================================ +# DATETIME PARSER +# ================================ +def to_mysql_dt(iso_str): + if not iso_str: + return None + try: + dt = parser.isoparse(iso_str) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=datetime.now().astimezone().tzinfo) + return dt.astimezone().strftime("%Y-%m-%d %H:%M:%S") + except Exception: + return None + + +# ================================ +# UPSERT +# ================================ +def upsert(conn, r): + p = r.get("extendedPatient") or {} + + api_updated = to_mysql_dt(r.get("updatedAt")) + msg_updated = to_mysql_dt((r.get("lastMessage") or {}).get("createdAt")) + + final_updated = max(filter(None, [api_updated, msg_updated]), default=None) + + sql = """ + INSERT INTO pozadavky ( + id, displayTitle, createdAt, updatedAt, doneAt, removedAt, + pacient_jmeno, pacient_prijmeni, pacient_rodnecislo + ) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s) + ON DUPLICATE KEY UPDATE + displayTitle=VALUES(displayTitle), + updatedAt=VALUES(updatedAt), + doneAt=VALUES(doneAt), + removedAt=VALUES(removedAt), + pacient_jmeno=VALUES(pacient_jmeno), + pacient_prijmeni=VALUES(pacient_prijmeni), + pacient_rodnecislo=VALUES(pacient_rodnecislo) + """ + + vals = ( + r.get("id"), + r.get("displayTitle"), + to_mysql_dt(r.get("createdAt")), + final_updated, + to_mysql_dt(r.get("doneAt")), + to_mysql_dt(r.get("removedAt")), + p.get("name"), + p.get("surname"), + p.get("identificationNumber"), + ) + + with conn.cursor() as cur: + cur.execute(sql, vals) + conn.commit() + + +# ================================ +# FETCH PAGE (per state) +# ================================ +def fetch_state(headers, state, offset): + variables = { + "clinicSlug": CLINIC_SLUG, + "queueId": None, + "queueAssignment": "ANY", + "state": state, + "pageInfo": {"first": BATCH_SIZE, "offset": offset}, + "locale": "cs", + } + + payload = { + "operationName": "ClinicRequestList2", + "query": GRAPHQL_QUERY, + "variables": variables, + } + + r = requests.post("https://api.medevio.cz/graphql", json=payload, headers=headers) + r.raise_for_status() + + data = r.json()["data"]["requestsResponse"] + return data.get("patientRequests", []), data.get("count", 0) + + +# ================================ +# MAIN +# ================================ +def main(): + token = read_token(TOKEN_PATH) + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "Accept": "application/json", + } + + conn = pymysql.connect(**DB_CONFIG) + + safe_print(f"\n=== FULL Medevio READ-ALL sync @ {datetime.now():%Y-%m-%d %H:%M:%S} ===") + + grand_total = 0 + + for state in STATES: + safe_print(f"\n🔁 STATE = {state}") + offset = 0 + total = None + processed = 0 + + while True: + batch, count = fetch_state(headers, state, offset) + + if total is None: + total = count + safe_print(f"📡 {state}: celkem {total}") + + if not batch: + break + + for r in batch: + upsert(conn, r) + + processed += len(batch) + safe_print(f" • {processed}/{total}") + + offset += BATCH_SIZE + if offset >= count: + break + + time.sleep(0.4) + + grand_total += processed + + conn.close() + safe_print(f"\n✅ HOTOVO – celkem zpracováno {grand_total} požadavků\n") + + +# ================================ +if __name__ == "__main__": + main() diff --git a/10ReadPozadavky/PRAVIDELNE_3_StahniKomunikacifull.py b/12 Readallinbatches/PRAVIDELNE_3_StahniKomunikacifull.py similarity index 99% rename from 10ReadPozadavky/PRAVIDELNE_3_StahniKomunikacifull.py rename to 12 Readallinbatches/PRAVIDELNE_3_StahniKomunikacifull.py index 675e4a9..34550c0 100644 --- a/10ReadPozadavky/PRAVIDELNE_3_StahniKomunikacifull.py +++ b/12 Readallinbatches/PRAVIDELNE_3_StahniKomunikacifull.py @@ -21,7 +21,7 @@ import argparse # ============================== # 🔧 CONFIGURATION # ============================== -TOKEN_PATH = Path("token.txt") +TOKEN_PATH = Path("../10ReadPozadavky/token.txt") DB_CONFIG = { "host": "192.168.1.76", diff --git a/12 Readallinbatches/medevio_storage.json b/12 Readallinbatches/medevio_storage.json new file mode 100644 index 0000000..a0dc794 --- /dev/null +++ b/12 Readallinbatches/medevio_storage.json @@ -0,0 +1 @@ +{"cookies": [{"name": "gateway-access-token", "value": "YwBgkf8McREDKs7vCZj0EZD2fJsuV8RyDPtYx7WiDoz0nFJ9kxId8kcNEPBLFSwM+Tiz80+SOdFwo+oj", "domain": "my.medevio.cz", "path": "/", "expires": 1763372319, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "aws-waf-token", "value": "b6a1d4eb-4350-40e5-8e52-1f5f9600fbb8:CgoAr9pC8c6zAAAA:OYwXLY5OyitSQPl5v2oIlS+hIxsrb5LxV4VjCyE2gJCFFE5PQu+0Zbxse2ZIofrNv5QKs0TYUDTmxPhZyTr9Qtjnq2gsVQxWHXzrbebv3Z7RbzB63u6Ymn3Fo8IbDev3CfCNcNuxCKltFEXLqSCjI2vqNY+7HZkgQBIqy2wMgzli3aSLq0w8lWYtZzyyot7q8RPXWMGTfaBUo2reY0SOSffm9rAivE9PszNfPid71CvNrGAAoxRbwb25eVujlyIcDVWe5vZ9Iw==", "domain": ".my.medevio.cz", "path": "/", "expires": 1761125920, "httpOnly": false, "secure": true, "sameSite": "Lax"}], "origins": [{"origin": "https://my.medevio.cz", "localStorage": [{"name": "awswaf_token_refresh_timestamp", "value": "1760780309860"}, {"name": "awswaf_session_storage", "value": "b6a1d4eb-4350-40e5-8e52-1f5f9600fbb8:CgoAr9pC8c+zAAAA:+vw//1NzmePjPpbGCJzUB+orCRivtJd098DbDX4AnABiGRw/+ql6ShqvFY4YdCY7w2tegb5mEPBdAmc4sNi22kNR9BuEoAgCUiMhkU1AZWfzM51zPfTh7SveCrREZ7xdvxcqKPMmfVLRYX5E4+UWh22z/LKQ7+d9VERp3J+wWCUW3dFFirkezy3N7b2FVjTlY/RxsZwhejQziTG/L3CkIFFP3mOReNgBvDpj7aKoM1knY4IL4TZ8E7zNv3nTsvzACLYvnUutVOUcofN1TfOzwZshSKsEXsMzrQn8PzLccX1jM5VSzce7gfEzl0zSPsT8NB3Sna+rhMIttDNYgvbW1HsfG2LIeKMR27Zf8hkslDRVVkcU/Kp2jLOEdhhrBKGjKY2o9/uX3NExdzh5MEKQSSRtmue01BpWYILPH23rMsz4YSmF+Ough5OeQoC95rkcYwVXMhwvUN9Zfp9UZ4xCNfFUex5dOrg9aJntYRnaceeocGUttNI5AdT0i3+osV6XHXzKxeqO8zLCS9BIsCzxaHfdqqem5DorMceuGKz+QqksatIQAA=="}, {"name": "Application.Intl.locale", "value": "cs"}, {"name": "Password.prefill", "value": "{\"username\":\"vladimir.buzalka@buzalka.cz\",\"type\":\"email\"}"}]}]} \ No newline at end of file diff --git a/12 Readallinbatches/token.txt b/12 Readallinbatches/token.txt new file mode 100644 index 0000000..d31188b --- /dev/null +++ b/12 Readallinbatches/token.txt @@ -0,0 +1 @@ +nYvrvgflIKcDiQg8Hhpud+qG8iGZ8eH8su4nyT/Mgcm7XQp65ygY9s39+O01wIpk/7sKd6fBHkiKvsqH \ No newline at end of file diff --git a/dddddd.py b/dddddd.py new file mode 100644 index 0000000..f5f717e --- /dev/null +++ b/dddddd.py @@ -0,0 +1,315 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +""" +FAST FILE HASH INDEXER – UNRAID (BLAKE3 ONLY, ALL SHARES) +- HARDCODED SINGLE SHARE MODE +- SQL OPTIMIZATION +- STRICT MODE (NO TOLERANCE) - Updates DB on any mismatch +""" + +import os +import pymysql +import socket +import platform +from blake3 import blake3 + +# ============================== +# ENV / HOST +# ============================== + +HOSTNAME = socket.gethostname() +OS_NAME = platform.system() + +# ZDE JE TO NATVRDO PRO TESTOVÁNÍ: +# SCAN_ONLY_THIS = None #"#Fotky" +SCAN_ONLY_THIS = '#Library' # "#Fotky" + +# ============================== +# CONFIG +# ============================== + +EXCLUDED_SHARES = {"domains", "appdata", "system", "isos"} + +# --- File size limits (bytes) --- +FILE_MIN_SIZE = 0 +FILE_MAX_SIZE = 1024 * 1024 * 1024 * 1024 # 50MB + +DB_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "torrents", + "charset": "utf8mb4", + "autocommit": True, +} + +CHUNK_SIZE = 4 * 1024 * 1024 # 4 MB +PRINT_SKIPPED = False + + +# ============================== +# HASH +# ============================== + +def compute_blake3(path: str) -> bytes: + h = blake3() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(CHUNK_SIZE), b""): + h.update(chunk) + return h.digest() + + +# ============================== +# SHARE / PATH HELPERS +# ============================== + +def get_user_shares(): + if SCAN_ONLY_THIS: + path = f"/mnt/user/{SCAN_ONLY_THIS}" + if os.path.isdir(path): + print(f"🎯 SINGLE SHARE MODE ACTIVE: Scanning only '{SCAN_ONLY_THIS}'") + return [SCAN_ONLY_THIS] + else: + print(f"⚠️ ERROR: Requested share '{SCAN_ONLY_THIS}' not found in /mnt/user!") + return [] + + shares = [] + if not os.path.exists("/mnt/user"): + return [] + + for name in os.listdir("/mnt/user"): + if name.startswith("."): + continue + if name in EXCLUDED_SHARES: + continue + path = f"/mnt/user/{name}" + if os.path.isdir(path): + shares.append(name) + return sorted(shares) + + +def find_physical_roots(shares): + roots = [] + if not os.path.exists("/mnt"): + return [] + for disk in os.listdir("/mnt"): + if not disk.startswith("disk"): + continue + for share in shares: + path = f"/mnt/{disk}/{share}" + if os.path.isdir(path): + roots.append((share, path)) + return sorted(roots) + + +def logical_path_from_disk_path(disk_path: str) -> str: + if not disk_path.startswith("/mnt/disk"): + raise ValueError(f"Unexpected disk path: {disk_path}") + parts = disk_path.split("/", 3) + return f"/mnt/user/{parts[3]}" + + +def size_allowed(size: int) -> bool: + if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE: + return False + if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE: + return False + return True + + +# ============================== +# MAIN +# ============================== + +def main(): + print("🚀 BLAKE3 indexer starting", flush=True) + print(f"🖥 Host: {HOSTNAME} | OS: {OS_NAME}", flush=True) + + if FILE_MIN_SIZE or FILE_MAX_SIZE: + print(f"📏 File size limits: min={FILE_MIN_SIZE} max={FILE_MAX_SIZE}", flush=True) + + shares = get_user_shares() + if not shares: + print("❌ No user shares to index!", flush=True) + return + + print("📦 User shares to index:", flush=True) + for s in shares: + print(f" - {s}", flush=True) + + scan_roots = find_physical_roots(shares) + if not scan_roots: + print("❌ No physical disk roots found!", flush=True) + return + + print("📂 Physical scan roots:", flush=True) + for _, path in scan_roots: + print(f" - {path}", flush=True) + + try: + db = pymysql.connect(**DB_CONFIG) + cur = db.cursor() + # === TOTO JE TEN PŘÍKAZ "NEPŘEMÝŠLEJ" === + # Nastaví relaci na UTC. MySQL přestane posouvat časy o hodinu sem a tam. + # cur.execute("SET time_zone = '+00:00'") + # ========================================= + except Exception as e: + print(f"❌ Database connection failed: {e}") + return + + print("📥 Loading already indexed files into memory...", flush=True) + + # === OPTIMALIZACE SQL === + if SCAN_ONLY_THIS: + search_pattern = f"/mnt/user/{SCAN_ONLY_THIS}%" + print(f"⚡ OPTIMIZATION: Fetching only DB records for '{search_pattern}'", flush=True) + cur.execute(""" + SELECT full_path, file_size, UNIX_TIMESTAMP(mtime) + FROM file_md5_index + WHERE host_name = %s AND full_path LIKE %s + """, (HOSTNAME, search_pattern)) + else: + cur.execute(""" + SELECT full_path, file_size, UNIX_TIMESTAMP(mtime) + FROM file_md5_index + WHERE host_name = %s + """, (HOSTNAME,)) + + # Načteme do slovníku pro rychlé vyhledávání + # Formát: { "cesta": (velikost, mtime) } + indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()} + print(f"✅ Loaded {len(indexed_map):,} indexed entries", flush=True) + print("======================================", flush=True) + + new_files = 0 + skipped = 0 + filtered = 0 + seen_paths = set() + + # --- SCAN --- + for share, scan_root in scan_roots: + for root, _, files in os.walk(scan_root): + for fname in files: + disk_path = os.path.join(root, fname) + + try: + stat = os.stat(disk_path) + except OSError: + continue + + size = stat.st_size + if not size_allowed(size): + filtered += 1 + continue + + logical_path = logical_path_from_disk_path(disk_path) + + if logical_path in seen_paths: + continue + seen_paths.add(logical_path) + + mtime = int(stat.st_mtime) + + # === PŘÍSNÁ KONTROLA (ŽÁDNÁ TOLERANCE) === + # Pokud soubor v DB existuje a přesně sedí velikost i čas, přeskočíme ho. + # Vše ostatní (včetně posunu času o 1s) se považuje za změnu a aktualizuje se. + + is_match = False + if logical_path in indexed_map: + db_size, db_mtime = indexed_map[logical_path] + if size == db_size and mtime == db_mtime: + is_match = True + + if is_match: + skipped += 1 + if PRINT_SKIPPED: + print(f"⏭ SKIP {logical_path}", flush=True) + continue + # ============================================ + + print("➕ NEW / UPDATED", flush=True) + print(f" File: {logical_path}", flush=True) + print(f" Size: {size:,} B", flush=True) + + try: + b3 = compute_blake3(disk_path) + except Exception as e: + print(f"❌ BLAKE3 failed: {e}", flush=True) + continue + + # Zde proběhne UPDATE mtime na hodnotu z disku + cur.execute(""" + INSERT INTO file_md5_index + (os_name, host_name, full_path, file_name, directory, + file_size, mtime, blake3) + VALUES (%s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s) + ON DUPLICATE KEY UPDATE + file_size = VALUES(file_size), + mtime = VALUES(mtime), + blake3 = VALUES(blake3), + updated_at = CURRENT_TIMESTAMP + """, ( + OS_NAME, + HOSTNAME, + logical_path, + fname, + os.path.dirname(logical_path), + size, + mtime, + b3, + )) + + new_files += 1 + print(f" B3 : {b3.hex()}", flush=True) + print("--------------------------------------", flush=True) + + print("======================================", flush=True) + print(f"✅ New / updated : {new_files}", flush=True) + print(f"⏭ Skipped : {skipped}", flush=True) + print(f"🚫 Size filtered: {filtered}", flush=True) + print("🏁 Script finished", flush=True) + + + # ============================== + # DB CLEANUP – REMOVE DELETED FILES + # ============================== + + print("🧹 Checking for deleted files in DB...", flush=True) + + db_paths = set(indexed_map.keys()) + deleted_paths = db_paths - seen_paths + + # Omezíme jen na aktuální share (pokud je aktivní) + if SCAN_ONLY_THIS: + prefix = f"/mnt/user/{SCAN_ONLY_THIS}/" + deleted_paths = {p for p in deleted_paths if p.startswith(prefix)} + + if deleted_paths: + print(f"🗑 Removing {len(deleted_paths):,} deleted files from DB", flush=True) + + BATCH_SIZE = 1000 + deleted_paths = list(deleted_paths) + + for i in range(0, len(deleted_paths), BATCH_SIZE): + batch = deleted_paths[i:i + BATCH_SIZE] + placeholders = ",".join(["%s"] * len(batch)) + + sql = f""" + DELETE FROM file_md5_index + WHERE host_name = %s + AND full_path IN ({placeholders}) + """ + + cur.execute(sql, (HOSTNAME, *batch)) + + print("✅ DB cleanup completed", flush=True) + else: + print("✅ No deleted files found in DB", flush=True) + + cur.close() + db.close() + +if __name__ == "__main__": + main() \ No newline at end of file