tw22
This commit is contained in:
2
.idea/Medevio.iml
generated
2
.idea/Medevio.iml
generated
@@ -4,7 +4,7 @@
|
|||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="jdk" jdkName="Python 3.12 (Medevio)" jdkType="Python SDK" />
|
<orderEntry type="jdk" jdkName="Python 3.12" jdkType="Python SDK" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
</component>
|
</component>
|
||||||
</module>
|
</module>
|
||||||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
@@ -3,5 +3,5 @@
|
|||||||
<component name="Black">
|
<component name="Black">
|
||||||
<option name="sdkName" value="Python 3.12 (Medevio)" />
|
<option name="sdkName" value="Python 3.12 (Medevio)" />
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (Medevio)" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
||||||
173
10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental.py
Normal file
173
10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental.py
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import pymysql
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
import time
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# ⚙️ CONFIGURATION
|
||||||
|
# ==============================
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.76",
|
||||||
|
"port": 3307,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "medevio",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
}
|
||||||
|
|
||||||
|
BASE_DIR = Path(r"d:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
||||||
|
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_name(name: str) -> str:
|
||||||
|
"""Replace invalid filename characters with underscore."""
|
||||||
|
return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip()
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# 🧹 DELETE UNEXPECTED FILES
|
||||||
|
# ==============================
|
||||||
|
def clean_folder(folder: Path, valid_files: set):
|
||||||
|
"""Remove all files in folder that are NOT present in valid_files."""
|
||||||
|
if not folder.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
for f in folder.iterdir():
|
||||||
|
if f.is_file():
|
||||||
|
if sanitize_name(f.name) not in valid_files:
|
||||||
|
print(f"🗑️ Removing unexpected file: {f.name}")
|
||||||
|
try:
|
||||||
|
f.unlink()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Could not delete {f}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# 📦 DB CONNECTION
|
||||||
|
# ==============================
|
||||||
|
conn = pymysql.connect(**DB_CONFIG)
|
||||||
|
|
||||||
|
cur_meta = conn.cursor(pymysql.cursors.DictCursor)
|
||||||
|
cur_blob = conn.cursor()
|
||||||
|
|
||||||
|
print("🔍 Loading metadata from DB (FAST)…")
|
||||||
|
|
||||||
|
cur_meta.execute("""
|
||||||
|
SELECT d.id AS download_id,
|
||||||
|
d.request_id,
|
||||||
|
d.filename,
|
||||||
|
d.created_at,
|
||||||
|
p.updatedAt AS req_updated_at,
|
||||||
|
p.pacient_jmeno AS jmeno,
|
||||||
|
p.pacient_prijmeni AS prijmeni
|
||||||
|
FROM medevio_downloads d
|
||||||
|
JOIN pozadavky p ON d.request_id = p.id
|
||||||
|
ORDER BY p.updatedAt DESC
|
||||||
|
""")
|
||||||
|
|
||||||
|
rows = cur_meta.fetchall()
|
||||||
|
print(f"📋 Found {len(rows)} attachment records.\n")
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# 🧠 MAIN LOOP
|
||||||
|
# ==============================
|
||||||
|
processed_requests = set()
|
||||||
|
|
||||||
|
for r in rows:
|
||||||
|
req_id = r["request_id"]
|
||||||
|
|
||||||
|
if req_id in processed_requests:
|
||||||
|
continue
|
||||||
|
processed_requests.add(req_id)
|
||||||
|
|
||||||
|
# ========== FETCH ALL VALID FILES FOR THIS REQUEST ==========
|
||||||
|
cur_meta.execute(
|
||||||
|
"SELECT filename FROM medevio_downloads WHERE request_id=%s",
|
||||||
|
(req_id,)
|
||||||
|
)
|
||||||
|
valid_files = {sanitize_name(row["filename"]) for row in cur_meta.fetchall()}
|
||||||
|
|
||||||
|
# ========== FOLDER NAME BASED ON UPDATEDAT ==========
|
||||||
|
updated_at = r["req_updated_at"] or datetime.now()
|
||||||
|
date_str = updated_at.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
prijmeni = sanitize_name(r["prijmeni"] or "Unknown")
|
||||||
|
jmeno = sanitize_name(r["jmeno"] or "")
|
||||||
|
|
||||||
|
folder_name = f"{date_str} {prijmeni}, {jmeno} {req_id}"
|
||||||
|
folder_name = sanitize_name(folder_name)
|
||||||
|
main_folder = BASE_DIR / folder_name
|
||||||
|
|
||||||
|
# ========== FIND OLD FOLDER (DUPLICATE) ==========
|
||||||
|
# Any folder that contains "_<req_id>" and is not main_folder is duplicate
|
||||||
|
possible_dups = [
|
||||||
|
f for f in BASE_DIR.iterdir()
|
||||||
|
if f.is_dir() and req_id in f.name and f != main_folder
|
||||||
|
]
|
||||||
|
|
||||||
|
# ========== MERGE DUPLICATES ==========
|
||||||
|
for dup in possible_dups:
|
||||||
|
print(f"♻️ Merging duplicate folder: {dup.name}")
|
||||||
|
|
||||||
|
# 1) Clean unexpected files in dup
|
||||||
|
clean_folder(dup, valid_files)
|
||||||
|
|
||||||
|
# 2) Move files from dup to main folder
|
||||||
|
main_folder.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
for f in dup.iterdir():
|
||||||
|
if f.is_file():
|
||||||
|
target = main_folder / f.name
|
||||||
|
if not target.exists():
|
||||||
|
f.rename(target)
|
||||||
|
|
||||||
|
# 3) Remove the duplicate folder
|
||||||
|
try:
|
||||||
|
shutil.rmtree(dup, ignore_errors=True)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Could not delete duplicate folder {dup}: {e}")
|
||||||
|
|
||||||
|
# ========== CLEAN MAIN FOLDER ==========
|
||||||
|
clean_folder(main_folder, valid_files)
|
||||||
|
|
||||||
|
# ========== DOWNLOAD MISSING FILES ==========
|
||||||
|
main_folder.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
for filename in valid_files:
|
||||||
|
dest = main_folder / filename
|
||||||
|
if dest.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# fetch blob only now
|
||||||
|
start = time.perf_counter()
|
||||||
|
cur_blob.execute(
|
||||||
|
"SELECT file_content FROM medevio_downloads "
|
||||||
|
"WHERE request_id=%s AND filename=%s",
|
||||||
|
(req_id, filename)
|
||||||
|
)
|
||||||
|
row = cur_blob.fetchone()
|
||||||
|
if not row:
|
||||||
|
continue
|
||||||
|
end = time.perf_counter()
|
||||||
|
print(f"⏱ Took {end - start:.4f} seconds")
|
||||||
|
|
||||||
|
content = row[0]
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
|
||||||
|
with open(dest, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
print(f"💾 Wrote: {dest.relative_to(BASE_DIR)}")
|
||||||
|
|
||||||
|
print("\n🎯 Export complete.\n")
|
||||||
|
|
||||||
|
cur_blob.close()
|
||||||
|
cur_meta.close()
|
||||||
|
conn.close()
|
||||||
146
10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem single step.py
Normal file
146
10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem single step.py
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import pymysql
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# ⚙️ CONFIGURATION
|
||||||
|
# ==============================
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.76",
|
||||||
|
"port": 3307,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "medevio",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
}
|
||||||
|
|
||||||
|
BASE_DIR = Path(r"d:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
||||||
|
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_name(name: str) -> str:
|
||||||
|
return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def clean_folder(folder: Path, valid_files: set):
|
||||||
|
"""Remove files that do NOT exist in MySQL for this request."""
|
||||||
|
if not folder.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
for f in folder.iterdir():
|
||||||
|
if f.is_file() and sanitize_name(f.name) not in valid_files:
|
||||||
|
print(f"🗑️ Removing unexpected file: {f.name}")
|
||||||
|
try:
|
||||||
|
f.unlink()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Cannot delete {f}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# 📥 LOAD EVERYTHING IN ONE QUERY
|
||||||
|
# ==============================
|
||||||
|
conn = pymysql.connect(**DB_CONFIG)
|
||||||
|
cur = conn.cursor(pymysql.cursors.DictCursor)
|
||||||
|
|
||||||
|
print("📥 Loading ALL metadata + BLOBs with ONE MySQL query…")
|
||||||
|
|
||||||
|
cur.execute("""
|
||||||
|
SELECT
|
||||||
|
d.id AS download_id,
|
||||||
|
d.request_id,
|
||||||
|
d.filename,
|
||||||
|
d.file_content,
|
||||||
|
p.updatedAt AS req_updated_at,
|
||||||
|
p.pacient_jmeno AS jmeno,
|
||||||
|
p.pacient_prijmeni AS prijmeni
|
||||||
|
FROM medevio_downloads d
|
||||||
|
JOIN pozadavky p ON d.request_id = p.id
|
||||||
|
ORDER BY p.updatedAt DESC, d.created_at ASC
|
||||||
|
""")
|
||||||
|
|
||||||
|
rows = cur.fetchall()
|
||||||
|
print(f"📦 Loaded {len(rows)} total file rows.\n")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# 🔄 ORGANIZE ROWS PER REQUEST
|
||||||
|
# ==============================
|
||||||
|
requests = {} # req_id → list of file dicts
|
||||||
|
|
||||||
|
for r in rows:
|
||||||
|
req_id = r["request_id"]
|
||||||
|
if req_id not in requests:
|
||||||
|
requests[req_id] = []
|
||||||
|
requests[req_id].append(r)
|
||||||
|
|
||||||
|
print(f"📌 Unique requests: {len(requests)}\n")
|
||||||
|
|
||||||
|
# ==============================
|
||||||
|
# 🧠 MAIN LOOP – SAME LOGIC AS BEFORE
|
||||||
|
# ==============================
|
||||||
|
for req_id, filelist in requests.items():
|
||||||
|
|
||||||
|
# ========== GET UPDATEDAT (same logic) ==========
|
||||||
|
any_row = filelist[0]
|
||||||
|
updated_at = any_row["req_updated_at"] or datetime.now()
|
||||||
|
date_str = updated_at.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
prijmeni = sanitize_name(any_row["prijmeni"] or "Unknown")
|
||||||
|
jmeno = sanitize_name(any_row["jmeno"] or "")
|
||||||
|
|
||||||
|
folder_name = sanitize_name(f"{date_str} {prijmeni}, {jmeno} {req_id}")
|
||||||
|
main_folder = BASE_DIR / folder_name
|
||||||
|
|
||||||
|
# ========== VALID FILES ==========
|
||||||
|
valid_files = {sanitize_name(r["filename"]) for r in filelist}
|
||||||
|
|
||||||
|
# ========== FIND OLD FOLDERS ==========
|
||||||
|
possible_dups = [
|
||||||
|
f for f in BASE_DIR.iterdir()
|
||||||
|
if f.is_dir() and req_id in f.name and f != main_folder
|
||||||
|
]
|
||||||
|
|
||||||
|
# ========== MERGE OLD FOLDERS ==========
|
||||||
|
for dup in possible_dups:
|
||||||
|
print(f"♻️ Merging folder: {dup.name}")
|
||||||
|
|
||||||
|
clean_folder(dup, valid_files)
|
||||||
|
main_folder.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
for f in dup.iterdir():
|
||||||
|
if f.is_file():
|
||||||
|
target = main_folder / f.name
|
||||||
|
if not target.exists():
|
||||||
|
f.rename(target)
|
||||||
|
|
||||||
|
shutil.rmtree(dup, ignore_errors=True)
|
||||||
|
|
||||||
|
# ========== CLEAN MAIN FOLDER ==========
|
||||||
|
main_folder.mkdir(parents=True, exist_ok=True)
|
||||||
|
clean_folder(main_folder, valid_files)
|
||||||
|
|
||||||
|
# ========== SAVE FILES (fast now) ==========
|
||||||
|
for r in filelist:
|
||||||
|
filename = sanitize_name(r["filename"])
|
||||||
|
dest = main_folder / filename
|
||||||
|
|
||||||
|
if dest.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
content = r["file_content"]
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
|
||||||
|
with open(dest, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
print(f"💾 Saved: {dest.relative_to(BASE_DIR)}")
|
||||||
|
|
||||||
|
print("\n🎯 Export complete.\n")
|
||||||
@@ -1,113 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import os
|
|
||||||
import zlib
|
|
||||||
import pymysql
|
|
||||||
import re
|
|
||||||
from pathlib import Path
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# ==============================
|
|
||||||
# ⚙️ CONFIGURATION
|
|
||||||
# ==============================
|
|
||||||
DB_CONFIG = {
|
|
||||||
"host": "192.168.1.76",
|
|
||||||
"port": 3307,
|
|
||||||
"user": "root",
|
|
||||||
"password": "Vlado9674+",
|
|
||||||
"database": "medevio",
|
|
||||||
"charset": "utf8mb4",
|
|
||||||
}
|
|
||||||
|
|
||||||
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
|
||||||
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_name(name: str) -> str:
|
|
||||||
"""Replace invalid filename characters with underscore."""
|
|
||||||
return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip()
|
|
||||||
|
|
||||||
|
|
||||||
# ==============================
|
|
||||||
# 📦 EXPORT WITH JOIN TO POZADAVKY
|
|
||||||
# ==============================
|
|
||||||
conn = pymysql.connect(**DB_CONFIG)
|
|
||||||
cur_meta = conn.cursor(pymysql.cursors.DictCursor)
|
|
||||||
cur_blob = conn.cursor()
|
|
||||||
|
|
||||||
# 🎯 JOIN medevio_downloads → pozadavky
|
|
||||||
cur_meta.execute("""
|
|
||||||
SELECT d.id, d.request_id, d.attachment_id, d.filename,
|
|
||||||
d.created_at, d.downloaded_at,
|
|
||||||
p.pacient_jmeno AS jmeno,
|
|
||||||
p.pacient_prijmeni AS prijmeni
|
|
||||||
FROM medevio_downloads d
|
|
||||||
JOIN pozadavky p ON d.request_id = p.id
|
|
||||||
WHERE d.file_content IS NOT NULL;
|
|
||||||
""")
|
|
||||||
|
|
||||||
rows = cur_meta.fetchall()
|
|
||||||
print(f"📋 Found {len(rows)} records to check/export")
|
|
||||||
|
|
||||||
skipped, exported = 0, 0
|
|
||||||
|
|
||||||
for r in rows:
|
|
||||||
try:
|
|
||||||
created = r["created_at"] or r["downloaded_at"] or datetime.now()
|
|
||||||
date_str = created.strftime("%Y-%m-%d")
|
|
||||||
|
|
||||||
# 👍 Now always correct from pozadavky
|
|
||||||
prijmeni = sanitize_name(r["prijmeni"] or "Unknown")
|
|
||||||
jmeno = sanitize_name(r["jmeno"] or "")
|
|
||||||
|
|
||||||
# 🔥 Full request_id for folder identification
|
|
||||||
full_req_id = sanitize_name(r["request_id"])
|
|
||||||
|
|
||||||
# Folder names (normal and triangle)
|
|
||||||
base_folder = f"{date_str} {prijmeni}, {jmeno} {full_req_id}"
|
|
||||||
tri_folder = f"{date_str}▲ {prijmeni}, {jmeno} {full_req_id}"
|
|
||||||
|
|
||||||
base_folder = sanitize_name(base_folder)
|
|
||||||
tri_folder = sanitize_name(tri_folder)
|
|
||||||
|
|
||||||
base_path = BASE_DIR / base_folder
|
|
||||||
tri_path = BASE_DIR / tri_folder
|
|
||||||
|
|
||||||
filename = sanitize_name(r["filename"] or f"unknown_{r['id']}.bin")
|
|
||||||
file_path_base = base_path / filename
|
|
||||||
file_path_tri = tri_path / filename
|
|
||||||
|
|
||||||
# 🟡 Skip if file already exists
|
|
||||||
if file_path_base.exists() or file_path_tri.exists():
|
|
||||||
skipped += 1
|
|
||||||
found_in = "▲" if file_path_tri.exists() else ""
|
|
||||||
print(f"⏭️ Skipping existing{found_in}: {filename}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Ensure directory exists
|
|
||||||
base_path.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
# 2️⃣ Fetch blob content
|
|
||||||
cur_blob.execute(
|
|
||||||
"SELECT file_content FROM medevio_downloads WHERE id = %s",
|
|
||||||
(r["id"],)
|
|
||||||
)
|
|
||||||
blob = cur_blob.fetchone()[0]
|
|
||||||
|
|
||||||
if blob:
|
|
||||||
with open(file_path_base, "wb") as f:
|
|
||||||
f.write(blob)
|
|
||||||
exported += 1
|
|
||||||
print(f"✅ Saved: {file_path_base.relative_to(BASE_DIR)}")
|
|
||||||
else:
|
|
||||||
print(f"⚠️ No content for id={r['id']}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ Error for id={r['id']}: {e}")
|
|
||||||
|
|
||||||
cur_blob.close()
|
|
||||||
cur_meta.close()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
print(f"\n🎯 Export complete — {exported} new files saved, {skipped} skipped.\n")
|
|
||||||
@@ -20,7 +20,7 @@ DB_CONFIG = {
|
|||||||
"charset": "utf8mb4",
|
"charset": "utf8mb4",
|
||||||
}
|
}
|
||||||
|
|
||||||
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP1")
|
BASE_DIR = Path(r"d:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
||||||
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ DB_CONFIG = {
|
|||||||
"charset": "utf8mb4",
|
"charset": "utf8mb4",
|
||||||
}
|
}
|
||||||
|
|
||||||
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP1")
|
BASE_DIR = Path(r"d:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
||||||
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user