Z230
This commit is contained in:
293
10ReadPozadavky/PRAVIDELNE_3_StahniKomunikaciDELTA.py
Normal file
293
10ReadPozadavky/PRAVIDELNE_3_StahniKomunikaciDELTA.py
Normal file
@@ -0,0 +1,293 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Delta sync Medevio communication.
|
||||
Stáhne pouze zprávy změněné po messagesProcessed pro každý požadavek.
|
||||
"""
|
||||
|
||||
import json
|
||||
import requests
|
||||
import pymysql
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import time
|
||||
import sys
|
||||
|
||||
# ==============================
|
||||
# UTF-8 SAFE OUTPUT
|
||||
# ==============================
|
||||
try:
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
sys.stderr.reconfigure(encoding='utf-8')
|
||||
except AttributeError:
|
||||
import io
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
|
||||
|
||||
|
||||
def safe_print(text: str):
|
||||
enc = sys.stdout.encoding or ""
|
||||
if not enc.lower().startswith("utf"):
|
||||
text = ''.join(ch for ch in text if ord(ch) < 65536)
|
||||
try:
|
||||
print(text)
|
||||
except UnicodeEncodeError:
|
||||
text = ''.join(ch for ch in text if ord(ch) < 128)
|
||||
print(text)
|
||||
|
||||
|
||||
# ==============================
|
||||
# CONFIG
|
||||
# ==============================
|
||||
TOKEN_PATH = Path("token.txt")
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "medevio",
|
||||
"charset": "utf8mb4",
|
||||
"cursorclass": pymysql.cursors.DictCursor,
|
||||
}
|
||||
|
||||
GRAPHQL_QUERY_MESSAGES = r"""
|
||||
query UseMessages_ListMessages($requestId: String!, $updatedSince: DateTime) {
|
||||
messages: listMessages(
|
||||
patientRequestId: $requestId,
|
||||
updatedSince: $updatedSince
|
||||
) {
|
||||
id
|
||||
createdAt
|
||||
updatedAt
|
||||
readAt
|
||||
text
|
||||
type
|
||||
sender {
|
||||
id
|
||||
name
|
||||
surname
|
||||
clinicId
|
||||
}
|
||||
medicalRecord {
|
||||
id
|
||||
description
|
||||
contentType
|
||||
url
|
||||
downloadUrl
|
||||
createdAt
|
||||
updatedAt
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
# ==============================
|
||||
# HELPERS
|
||||
# ==============================
|
||||
def parse_dt(s):
|
||||
if not s:
|
||||
return None
|
||||
try:
|
||||
return datetime.fromisoformat(s.replace("Z", "+00:00"))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def read_token(path: Path) -> str:
|
||||
tok = path.read_text(encoding="utf-8").strip()
|
||||
return tok.replace("Bearer ", "")
|
||||
|
||||
|
||||
# ==============================
|
||||
# FETCH MESSAGES (DELTA)
|
||||
# ==============================
|
||||
def fetch_messages(headers, request_id, updated_since):
|
||||
payload = {
|
||||
"operationName": "UseMessages_ListMessages",
|
||||
"query": GRAPHQL_QUERY_MESSAGES,
|
||||
"variables": {
|
||||
"requestId": request_id,
|
||||
"updatedSince": updated_since,
|
||||
},
|
||||
}
|
||||
|
||||
r = requests.post(
|
||||
"https://api.medevio.cz/graphql",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if r.status_code != 200:
|
||||
safe_print(f"❌ HTTP {r.status_code} for request {request_id}")
|
||||
return []
|
||||
|
||||
j = r.json()
|
||||
if "errors" in j:
|
||||
safe_print(f"❌ GraphQL error for {request_id}: {j['errors']}")
|
||||
return []
|
||||
|
||||
return j.get("data", {}).get("messages", []) or []
|
||||
|
||||
|
||||
# ==============================
|
||||
# INSERT MESSAGE
|
||||
# ==============================
|
||||
def insert_message(cur, req_id, msg):
|
||||
sender = msg.get("sender") or {}
|
||||
sender_name = " ".join(
|
||||
x for x in [sender.get("name"), sender.get("surname")] if x
|
||||
) or None
|
||||
|
||||
mr = msg.get("medicalRecord") or {}
|
||||
|
||||
sql = """
|
||||
INSERT INTO medevio_conversation (
|
||||
id, request_id,
|
||||
sender_name, sender_id, sender_clinic_id,
|
||||
text, created_at, read_at, updated_at,
|
||||
attachment_url, attachment_description, attachment_content_type
|
||||
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
sender_name = VALUES(sender_name),
|
||||
sender_id = VALUES(sender_id),
|
||||
sender_clinic_id = VALUES(sender_clinic_id),
|
||||
text = VALUES(text),
|
||||
created_at = VALUES(created_at),
|
||||
read_at = VALUES(read_at),
|
||||
updated_at = VALUES(updated_at),
|
||||
attachment_url = VALUES(attachment_url),
|
||||
attachment_description = VALUES(attachment_description),
|
||||
attachment_content_type = VALUES(attachment_content_type)
|
||||
"""
|
||||
|
||||
cur.execute(sql, (
|
||||
msg.get("id"),
|
||||
req_id,
|
||||
sender_name,
|
||||
sender.get("id"),
|
||||
sender.get("clinicId"),
|
||||
msg.get("text"),
|
||||
parse_dt(msg.get("createdAt")),
|
||||
parse_dt(msg.get("readAt")),
|
||||
parse_dt(msg.get("updatedAt")),
|
||||
mr.get("downloadUrl") or mr.get("url"),
|
||||
mr.get("description"),
|
||||
mr.get("contentType")
|
||||
))
|
||||
|
||||
|
||||
# ==============================
|
||||
# INSERT ATTACHMENT (DEDUP)
|
||||
# ==============================
|
||||
def insert_download(cur, req_id, msg, existing_ids):
|
||||
mr = msg.get("medicalRecord") or {}
|
||||
attachment_id = mr.get("id")
|
||||
if not attachment_id or attachment_id in existing_ids:
|
||||
return
|
||||
|
||||
url = mr.get("downloadUrl") or mr.get("url")
|
||||
if not url:
|
||||
return
|
||||
|
||||
try:
|
||||
r = requests.get(url, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.content
|
||||
except Exception as e:
|
||||
safe_print(f"⚠️ Attachment download failed: {e}")
|
||||
return
|
||||
|
||||
filename = url.split("/")[-1].split("?")[0]
|
||||
|
||||
cur.execute("""
|
||||
INSERT INTO medevio_downloads (
|
||||
request_id, attachment_id, attachment_type,
|
||||
filename, content_type, file_size, created_at, file_content
|
||||
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
file_content = VALUES(file_content),
|
||||
file_size = VALUES(file_size),
|
||||
downloaded_at = NOW()
|
||||
""", (
|
||||
req_id,
|
||||
attachment_id,
|
||||
"MESSAGE_ATTACHMENT",
|
||||
filename,
|
||||
mr.get("contentType"),
|
||||
len(data),
|
||||
parse_dt(msg.get("createdAt")),
|
||||
data
|
||||
))
|
||||
|
||||
existing_ids.add(attachment_id)
|
||||
|
||||
|
||||
# ==============================
|
||||
# MAIN
|
||||
# ==============================
|
||||
def main():
|
||||
token = read_token(TOKEN_PATH)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
conn = pymysql.connect(**DB_CONFIG)
|
||||
|
||||
# existing attachments
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT attachment_id FROM medevio_downloads")
|
||||
existing_ids = {r["attachment_id"] for r in cur.fetchall()}
|
||||
|
||||
# select requests needing sync
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT id, messagesProcessed
|
||||
FROM pozadavky
|
||||
WHERE messagesProcessed IS NULL
|
||||
OR messagesProcessed < updatedAt
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
|
||||
safe_print(f"📋 Found {len(rows)} requests for message delta-sync\n")
|
||||
|
||||
for i, row in enumerate(rows, 1):
|
||||
req_id = row["id"]
|
||||
updated_since = row["messagesProcessed"]
|
||||
if updated_since:
|
||||
updated_since = updated_since.replace(microsecond=0).isoformat() + "Z"
|
||||
|
||||
safe_print(f"[{i}/{len(rows)}] {req_id}")
|
||||
|
||||
messages = fetch_messages(headers, req_id, updated_since)
|
||||
if not messages:
|
||||
safe_print(" ⏭ No new messages")
|
||||
else:
|
||||
with conn.cursor() as cur:
|
||||
for msg in messages:
|
||||
insert_message(cur, req_id, msg)
|
||||
insert_download(cur, req_id, msg, existing_ids)
|
||||
conn.commit()
|
||||
safe_print(f" ✅ {len(messages)} new/updated messages")
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"UPDATE pozadavky SET messagesProcessed = NOW() WHERE id = %s",
|
||||
(req_id,)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
time.sleep(0.25)
|
||||
|
||||
conn.close()
|
||||
safe_print("\n🎉 Delta message sync DONE")
|
||||
|
||||
|
||||
# ==============================
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
239
12 Readallinbatches/10 Readallpozadavkyinbatches.py
Normal file
239
12 Readallinbatches/10 Readallpozadavkyinbatches.py
Normal file
@@ -0,0 +1,239 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pymysql
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from dateutil import parser
|
||||
import time
|
||||
import sys
|
||||
|
||||
# ================================
|
||||
# UTF-8 SAFE OUTPUT (Windows friendly)
|
||||
# ================================
|
||||
try:
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
sys.stderr.reconfigure(encoding='utf-8')
|
||||
except AttributeError:
|
||||
import io
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
|
||||
|
||||
|
||||
def safe_print(text: str):
|
||||
enc = sys.stdout.encoding or ""
|
||||
if not enc.lower().startswith("utf"):
|
||||
text = ''.join(ch for ch in text if ord(ch) < 65536)
|
||||
try:
|
||||
print(text)
|
||||
except UnicodeEncodeError:
|
||||
text = ''.join(ch for ch in text if ord(ch) < 128)
|
||||
print(text)
|
||||
|
||||
|
||||
# ================================
|
||||
# 🔧 CONFIG
|
||||
# ================================
|
||||
TOKEN_PATH = Path("token.txt")
|
||||
CLINIC_SLUG = "mudr-buzalkova"
|
||||
|
||||
BATCH_SIZE = 500
|
||||
STATES = ["ACTIVE", "DONE"] # explicitně – jinak API vrací jen ACTIVE
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "medevio",
|
||||
"charset": "utf8mb4",
|
||||
"cursorclass": pymysql.cursors.DictCursor,
|
||||
}
|
||||
|
||||
GRAPHQL_QUERY = r"""
|
||||
query ClinicRequestList2(
|
||||
$clinicSlug: String!,
|
||||
$queueId: String,
|
||||
$queueAssignment: QueueAssignmentFilter!,
|
||||
$state: PatientRequestState,
|
||||
$pageInfo: PageInfo!,
|
||||
$locale: Locale!
|
||||
) {
|
||||
requestsResponse: listPatientRequestsForClinic2(
|
||||
clinicSlug: $clinicSlug,
|
||||
queueId: $queueId,
|
||||
queueAssignment: $queueAssignment,
|
||||
state: $state,
|
||||
pageInfo: $pageInfo
|
||||
) {
|
||||
count
|
||||
patientRequests {
|
||||
id
|
||||
displayTitle(locale: $locale)
|
||||
createdAt
|
||||
updatedAt
|
||||
doneAt
|
||||
removedAt
|
||||
extendedPatient {
|
||||
name
|
||||
surname
|
||||
identificationNumber
|
||||
}
|
||||
lastMessage {
|
||||
createdAt
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
# ================================
|
||||
# TOKEN
|
||||
# ================================
|
||||
def read_token(path: Path) -> str:
|
||||
tok = path.read_text(encoding="utf-8").strip()
|
||||
if tok.startswith("Bearer "):
|
||||
return tok.split(" ", 1)[1]
|
||||
return tok
|
||||
|
||||
|
||||
# ================================
|
||||
# DATETIME PARSER
|
||||
# ================================
|
||||
def to_mysql_dt(iso_str):
|
||||
if not iso_str:
|
||||
return None
|
||||
try:
|
||||
dt = parser.isoparse(iso_str)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=datetime.now().astimezone().tzinfo)
|
||||
return dt.astimezone().strftime("%Y-%m-%d %H:%M:%S")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# ================================
|
||||
# UPSERT
|
||||
# ================================
|
||||
def upsert(conn, r):
|
||||
p = r.get("extendedPatient") or {}
|
||||
|
||||
api_updated = to_mysql_dt(r.get("updatedAt"))
|
||||
msg_updated = to_mysql_dt((r.get("lastMessage") or {}).get("createdAt"))
|
||||
|
||||
final_updated = max(filter(None, [api_updated, msg_updated]), default=None)
|
||||
|
||||
sql = """
|
||||
INSERT INTO pozadavky (
|
||||
id, displayTitle, createdAt, updatedAt, doneAt, removedAt,
|
||||
pacient_jmeno, pacient_prijmeni, pacient_rodnecislo
|
||||
) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
displayTitle=VALUES(displayTitle),
|
||||
updatedAt=VALUES(updatedAt),
|
||||
doneAt=VALUES(doneAt),
|
||||
removedAt=VALUES(removedAt),
|
||||
pacient_jmeno=VALUES(pacient_jmeno),
|
||||
pacient_prijmeni=VALUES(pacient_prijmeni),
|
||||
pacient_rodnecislo=VALUES(pacient_rodnecislo)
|
||||
"""
|
||||
|
||||
vals = (
|
||||
r.get("id"),
|
||||
r.get("displayTitle"),
|
||||
to_mysql_dt(r.get("createdAt")),
|
||||
final_updated,
|
||||
to_mysql_dt(r.get("doneAt")),
|
||||
to_mysql_dt(r.get("removedAt")),
|
||||
p.get("name"),
|
||||
p.get("surname"),
|
||||
p.get("identificationNumber"),
|
||||
)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, vals)
|
||||
conn.commit()
|
||||
|
||||
|
||||
# ================================
|
||||
# FETCH PAGE (per state)
|
||||
# ================================
|
||||
def fetch_state(headers, state, offset):
|
||||
variables = {
|
||||
"clinicSlug": CLINIC_SLUG,
|
||||
"queueId": None,
|
||||
"queueAssignment": "ANY",
|
||||
"state": state,
|
||||
"pageInfo": {"first": BATCH_SIZE, "offset": offset},
|
||||
"locale": "cs",
|
||||
}
|
||||
|
||||
payload = {
|
||||
"operationName": "ClinicRequestList2",
|
||||
"query": GRAPHQL_QUERY,
|
||||
"variables": variables,
|
||||
}
|
||||
|
||||
r = requests.post("https://api.medevio.cz/graphql", json=payload, headers=headers)
|
||||
r.raise_for_status()
|
||||
|
||||
data = r.json()["data"]["requestsResponse"]
|
||||
return data.get("patientRequests", []), data.get("count", 0)
|
||||
|
||||
|
||||
# ================================
|
||||
# MAIN
|
||||
# ================================
|
||||
def main():
|
||||
token = read_token(TOKEN_PATH)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
conn = pymysql.connect(**DB_CONFIG)
|
||||
|
||||
safe_print(f"\n=== FULL Medevio READ-ALL sync @ {datetime.now():%Y-%m-%d %H:%M:%S} ===")
|
||||
|
||||
grand_total = 0
|
||||
|
||||
for state in STATES:
|
||||
safe_print(f"\n🔁 STATE = {state}")
|
||||
offset = 0
|
||||
total = None
|
||||
processed = 0
|
||||
|
||||
while True:
|
||||
batch, count = fetch_state(headers, state, offset)
|
||||
|
||||
if total is None:
|
||||
total = count
|
||||
safe_print(f"📡 {state}: celkem {total}")
|
||||
|
||||
if not batch:
|
||||
break
|
||||
|
||||
for r in batch:
|
||||
upsert(conn, r)
|
||||
|
||||
processed += len(batch)
|
||||
safe_print(f" • {processed}/{total}")
|
||||
|
||||
offset += BATCH_SIZE
|
||||
if offset >= count:
|
||||
break
|
||||
|
||||
time.sleep(0.4)
|
||||
|
||||
grand_total += processed
|
||||
|
||||
conn.close()
|
||||
safe_print(f"\n✅ HOTOVO – celkem zpracováno {grand_total} požadavků\n")
|
||||
|
||||
|
||||
# ================================
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -21,7 +21,7 @@ import argparse
|
||||
# ==============================
|
||||
# 🔧 CONFIGURATION
|
||||
# ==============================
|
||||
TOKEN_PATH = Path("token.txt")
|
||||
TOKEN_PATH = Path("../10ReadPozadavky/token.txt")
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
1
12 Readallinbatches/medevio_storage.json
Normal file
1
12 Readallinbatches/medevio_storage.json
Normal file
@@ -0,0 +1 @@
|
||||
{"cookies": [{"name": "gateway-access-token", "value": "YwBgkf8McREDKs7vCZj0EZD2fJsuV8RyDPtYx7WiDoz0nFJ9kxId8kcNEPBLFSwM+Tiz80+SOdFwo+oj", "domain": "my.medevio.cz", "path": "/", "expires": 1763372319, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "aws-waf-token", "value": "b6a1d4eb-4350-40e5-8e52-1f5f9600fbb8:CgoAr9pC8c6zAAAA:OYwXLY5OyitSQPl5v2oIlS+hIxsrb5LxV4VjCyE2gJCFFE5PQu+0Zbxse2ZIofrNv5QKs0TYUDTmxPhZyTr9Qtjnq2gsVQxWHXzrbebv3Z7RbzB63u6Ymn3Fo8IbDev3CfCNcNuxCKltFEXLqSCjI2vqNY+7HZkgQBIqy2wMgzli3aSLq0w8lWYtZzyyot7q8RPXWMGTfaBUo2reY0SOSffm9rAivE9PszNfPid71CvNrGAAoxRbwb25eVujlyIcDVWe5vZ9Iw==", "domain": ".my.medevio.cz", "path": "/", "expires": 1761125920, "httpOnly": false, "secure": true, "sameSite": "Lax"}], "origins": [{"origin": "https://my.medevio.cz", "localStorage": [{"name": "awswaf_token_refresh_timestamp", "value": "1760780309860"}, {"name": "awswaf_session_storage", "value": "b6a1d4eb-4350-40e5-8e52-1f5f9600fbb8:CgoAr9pC8c+zAAAA:+vw//1NzmePjPpbGCJzUB+orCRivtJd098DbDX4AnABiGRw/+ql6ShqvFY4YdCY7w2tegb5mEPBdAmc4sNi22kNR9BuEoAgCUiMhkU1AZWfzM51zPfTh7SveCrREZ7xdvxcqKPMmfVLRYX5E4+UWh22z/LKQ7+d9VERp3J+wWCUW3dFFirkezy3N7b2FVjTlY/RxsZwhejQziTG/L3CkIFFP3mOReNgBvDpj7aKoM1knY4IL4TZ8E7zNv3nTsvzACLYvnUutVOUcofN1TfOzwZshSKsEXsMzrQn8PzLccX1jM5VSzce7gfEzl0zSPsT8NB3Sna+rhMIttDNYgvbW1HsfG2LIeKMR27Zf8hkslDRVVkcU/Kp2jLOEdhhrBKGjKY2o9/uX3NExdzh5MEKQSSRtmue01BpWYILPH23rMsz4YSmF+Ough5OeQoC95rkcYwVXMhwvUN9Zfp9UZ4xCNfFUex5dOrg9aJntYRnaceeocGUttNI5AdT0i3+osV6XHXzKxeqO8zLCS9BIsCzxaHfdqqem5DorMceuGKz+QqksatIQAA=="}, {"name": "Application.Intl.locale", "value": "cs"}, {"name": "Password.prefill", "value": "{\"username\":\"vladimir.buzalka@buzalka.cz\",\"type\":\"email\"}"}]}]}
|
||||
1
12 Readallinbatches/token.txt
Normal file
1
12 Readallinbatches/token.txt
Normal file
@@ -0,0 +1 @@
|
||||
nYvrvgflIKcDiQg8Hhpud+qG8iGZ8eH8su4nyT/Mgcm7XQp65ygY9s39+O01wIpk/7sKd6fBHkiKvsqH
|
||||
315
dddddd.py
Normal file
315
dddddd.py
Normal file
@@ -0,0 +1,315 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
FAST FILE HASH INDEXER – UNRAID (BLAKE3 ONLY, ALL SHARES)
|
||||
- HARDCODED SINGLE SHARE MODE
|
||||
- SQL OPTIMIZATION
|
||||
- STRICT MODE (NO TOLERANCE) - Updates DB on any mismatch
|
||||
"""
|
||||
|
||||
import os
|
||||
import pymysql
|
||||
import socket
|
||||
import platform
|
||||
from blake3 import blake3
|
||||
|
||||
# ==============================
|
||||
# ENV / HOST
|
||||
# ==============================
|
||||
|
||||
HOSTNAME = socket.gethostname()
|
||||
OS_NAME = platform.system()
|
||||
|
||||
# ZDE JE TO NATVRDO PRO TESTOVÁNÍ:
|
||||
# SCAN_ONLY_THIS = None #"#Fotky"
|
||||
SCAN_ONLY_THIS = '#Library' # "#Fotky"
|
||||
|
||||
# ==============================
|
||||
# CONFIG
|
||||
# ==============================
|
||||
|
||||
EXCLUDED_SHARES = {"domains", "appdata", "system", "isos"}
|
||||
|
||||
# --- File size limits (bytes) ---
|
||||
FILE_MIN_SIZE = 0
|
||||
FILE_MAX_SIZE = 1024 * 1024 * 1024 * 1024 # 50MB
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "torrents",
|
||||
"charset": "utf8mb4",
|
||||
"autocommit": True,
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 4 * 1024 * 1024 # 4 MB
|
||||
PRINT_SKIPPED = False
|
||||
|
||||
|
||||
# ==============================
|
||||
# HASH
|
||||
# ==============================
|
||||
|
||||
def compute_blake3(path: str) -> bytes:
|
||||
h = blake3()
|
||||
with open(path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(CHUNK_SIZE), b""):
|
||||
h.update(chunk)
|
||||
return h.digest()
|
||||
|
||||
|
||||
# ==============================
|
||||
# SHARE / PATH HELPERS
|
||||
# ==============================
|
||||
|
||||
def get_user_shares():
|
||||
if SCAN_ONLY_THIS:
|
||||
path = f"/mnt/user/{SCAN_ONLY_THIS}"
|
||||
if os.path.isdir(path):
|
||||
print(f"🎯 SINGLE SHARE MODE ACTIVE: Scanning only '{SCAN_ONLY_THIS}'")
|
||||
return [SCAN_ONLY_THIS]
|
||||
else:
|
||||
print(f"⚠️ ERROR: Requested share '{SCAN_ONLY_THIS}' not found in /mnt/user!")
|
||||
return []
|
||||
|
||||
shares = []
|
||||
if not os.path.exists("/mnt/user"):
|
||||
return []
|
||||
|
||||
for name in os.listdir("/mnt/user"):
|
||||
if name.startswith("."):
|
||||
continue
|
||||
if name in EXCLUDED_SHARES:
|
||||
continue
|
||||
path = f"/mnt/user/{name}"
|
||||
if os.path.isdir(path):
|
||||
shares.append(name)
|
||||
return sorted(shares)
|
||||
|
||||
|
||||
def find_physical_roots(shares):
|
||||
roots = []
|
||||
if not os.path.exists("/mnt"):
|
||||
return []
|
||||
for disk in os.listdir("/mnt"):
|
||||
if not disk.startswith("disk"):
|
||||
continue
|
||||
for share in shares:
|
||||
path = f"/mnt/{disk}/{share}"
|
||||
if os.path.isdir(path):
|
||||
roots.append((share, path))
|
||||
return sorted(roots)
|
||||
|
||||
|
||||
def logical_path_from_disk_path(disk_path: str) -> str:
|
||||
if not disk_path.startswith("/mnt/disk"):
|
||||
raise ValueError(f"Unexpected disk path: {disk_path}")
|
||||
parts = disk_path.split("/", 3)
|
||||
return f"/mnt/user/{parts[3]}"
|
||||
|
||||
|
||||
def size_allowed(size: int) -> bool:
|
||||
if FILE_MIN_SIZE is not None and size < FILE_MIN_SIZE:
|
||||
return False
|
||||
if FILE_MAX_SIZE is not None and size > FILE_MAX_SIZE:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# ==============================
|
||||
# MAIN
|
||||
# ==============================
|
||||
|
||||
def main():
|
||||
print("🚀 BLAKE3 indexer starting", flush=True)
|
||||
print(f"🖥 Host: {HOSTNAME} | OS: {OS_NAME}", flush=True)
|
||||
|
||||
if FILE_MIN_SIZE or FILE_MAX_SIZE:
|
||||
print(f"📏 File size limits: min={FILE_MIN_SIZE} max={FILE_MAX_SIZE}", flush=True)
|
||||
|
||||
shares = get_user_shares()
|
||||
if not shares:
|
||||
print("❌ No user shares to index!", flush=True)
|
||||
return
|
||||
|
||||
print("📦 User shares to index:", flush=True)
|
||||
for s in shares:
|
||||
print(f" - {s}", flush=True)
|
||||
|
||||
scan_roots = find_physical_roots(shares)
|
||||
if not scan_roots:
|
||||
print("❌ No physical disk roots found!", flush=True)
|
||||
return
|
||||
|
||||
print("📂 Physical scan roots:", flush=True)
|
||||
for _, path in scan_roots:
|
||||
print(f" - {path}", flush=True)
|
||||
|
||||
try:
|
||||
db = pymysql.connect(**DB_CONFIG)
|
||||
cur = db.cursor()
|
||||
# === TOTO JE TEN PŘÍKAZ "NEPŘEMÝŠLEJ" ===
|
||||
# Nastaví relaci na UTC. MySQL přestane posouvat časy o hodinu sem a tam.
|
||||
# cur.execute("SET time_zone = '+00:00'")
|
||||
# =========================================
|
||||
except Exception as e:
|
||||
print(f"❌ Database connection failed: {e}")
|
||||
return
|
||||
|
||||
print("📥 Loading already indexed files into memory...", flush=True)
|
||||
|
||||
# === OPTIMALIZACE SQL ===
|
||||
if SCAN_ONLY_THIS:
|
||||
search_pattern = f"/mnt/user/{SCAN_ONLY_THIS}%"
|
||||
print(f"⚡ OPTIMIZATION: Fetching only DB records for '{search_pattern}'", flush=True)
|
||||
cur.execute("""
|
||||
SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
|
||||
FROM file_md5_index
|
||||
WHERE host_name = %s AND full_path LIKE %s
|
||||
""", (HOSTNAME, search_pattern))
|
||||
else:
|
||||
cur.execute("""
|
||||
SELECT full_path, file_size, UNIX_TIMESTAMP(mtime)
|
||||
FROM file_md5_index
|
||||
WHERE host_name = %s
|
||||
""", (HOSTNAME,))
|
||||
|
||||
# Načteme do slovníku pro rychlé vyhledávání
|
||||
# Formát: { "cesta": (velikost, mtime) }
|
||||
indexed_map = {row[0]: (row[1], row[2]) for row in cur.fetchall()}
|
||||
print(f"✅ Loaded {len(indexed_map):,} indexed entries", flush=True)
|
||||
print("======================================", flush=True)
|
||||
|
||||
new_files = 0
|
||||
skipped = 0
|
||||
filtered = 0
|
||||
seen_paths = set()
|
||||
|
||||
# --- SCAN ---
|
||||
for share, scan_root in scan_roots:
|
||||
for root, _, files in os.walk(scan_root):
|
||||
for fname in files:
|
||||
disk_path = os.path.join(root, fname)
|
||||
|
||||
try:
|
||||
stat = os.stat(disk_path)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
size = stat.st_size
|
||||
if not size_allowed(size):
|
||||
filtered += 1
|
||||
continue
|
||||
|
||||
logical_path = logical_path_from_disk_path(disk_path)
|
||||
|
||||
if logical_path in seen_paths:
|
||||
continue
|
||||
seen_paths.add(logical_path)
|
||||
|
||||
mtime = int(stat.st_mtime)
|
||||
|
||||
# === PŘÍSNÁ KONTROLA (ŽÁDNÁ TOLERANCE) ===
|
||||
# Pokud soubor v DB existuje a přesně sedí velikost i čas, přeskočíme ho.
|
||||
# Vše ostatní (včetně posunu času o 1s) se považuje za změnu a aktualizuje se.
|
||||
|
||||
is_match = False
|
||||
if logical_path in indexed_map:
|
||||
db_size, db_mtime = indexed_map[logical_path]
|
||||
if size == db_size and mtime == db_mtime:
|
||||
is_match = True
|
||||
|
||||
if is_match:
|
||||
skipped += 1
|
||||
if PRINT_SKIPPED:
|
||||
print(f"⏭ SKIP {logical_path}", flush=True)
|
||||
continue
|
||||
# ============================================
|
||||
|
||||
print("➕ NEW / UPDATED", flush=True)
|
||||
print(f" File: {logical_path}", flush=True)
|
||||
print(f" Size: {size:,} B", flush=True)
|
||||
|
||||
try:
|
||||
b3 = compute_blake3(disk_path)
|
||||
except Exception as e:
|
||||
print(f"❌ BLAKE3 failed: {e}", flush=True)
|
||||
continue
|
||||
|
||||
# Zde proběhne UPDATE mtime na hodnotu z disku
|
||||
cur.execute("""
|
||||
INSERT INTO file_md5_index
|
||||
(os_name, host_name, full_path, file_name, directory,
|
||||
file_size, mtime, blake3)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, FROM_UNIXTIME(%s), %s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
file_size = VALUES(file_size),
|
||||
mtime = VALUES(mtime),
|
||||
blake3 = VALUES(blake3),
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""", (
|
||||
OS_NAME,
|
||||
HOSTNAME,
|
||||
logical_path,
|
||||
fname,
|
||||
os.path.dirname(logical_path),
|
||||
size,
|
||||
mtime,
|
||||
b3,
|
||||
))
|
||||
|
||||
new_files += 1
|
||||
print(f" B3 : {b3.hex()}", flush=True)
|
||||
print("--------------------------------------", flush=True)
|
||||
|
||||
print("======================================", flush=True)
|
||||
print(f"✅ New / updated : {new_files}", flush=True)
|
||||
print(f"⏭ Skipped : {skipped}", flush=True)
|
||||
print(f"🚫 Size filtered: {filtered}", flush=True)
|
||||
print("🏁 Script finished", flush=True)
|
||||
|
||||
|
||||
# ==============================
|
||||
# DB CLEANUP – REMOVE DELETED FILES
|
||||
# ==============================
|
||||
|
||||
print("🧹 Checking for deleted files in DB...", flush=True)
|
||||
|
||||
db_paths = set(indexed_map.keys())
|
||||
deleted_paths = db_paths - seen_paths
|
||||
|
||||
# Omezíme jen na aktuální share (pokud je aktivní)
|
||||
if SCAN_ONLY_THIS:
|
||||
prefix = f"/mnt/user/{SCAN_ONLY_THIS}/"
|
||||
deleted_paths = {p for p in deleted_paths if p.startswith(prefix)}
|
||||
|
||||
if deleted_paths:
|
||||
print(f"🗑 Removing {len(deleted_paths):,} deleted files from DB", flush=True)
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
deleted_paths = list(deleted_paths)
|
||||
|
||||
for i in range(0, len(deleted_paths), BATCH_SIZE):
|
||||
batch = deleted_paths[i:i + BATCH_SIZE]
|
||||
placeholders = ",".join(["%s"] * len(batch))
|
||||
|
||||
sql = f"""
|
||||
DELETE FROM file_md5_index
|
||||
WHERE host_name = %s
|
||||
AND full_path IN ({placeholders})
|
||||
"""
|
||||
|
||||
cur.execute(sql, (HOSTNAME, *batch))
|
||||
|
||||
print("✅ DB cleanup completed", flush=True)
|
||||
else:
|
||||
print("✅ No deleted files found in DB", flush=True)
|
||||
|
||||
cur.close()
|
||||
db.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user