notebook
This commit is contained in:
224
10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental1.py
Normal file
224
10ReadPozadavky/PRAVIDELNE_5_SaveToFileSystem incremental1.py
Normal file
@@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import pymysql
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
# ==============================
|
||||
# ⚙️ CONFIGURATION
|
||||
# ==============================
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": "192.168.1.76",
|
||||
"port": 3307,
|
||||
"user": "root",
|
||||
"password": "Vlado9674+",
|
||||
"database": "medevio",
|
||||
"charset": "utf8mb4",
|
||||
}
|
||||
|
||||
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
||||
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
# ==============================
|
||||
# 🔧 HELPERS
|
||||
# ==============================
|
||||
|
||||
def sanitize_name(name: str) -> str:
|
||||
"""Replace invalid Windows filename characters."""
|
||||
return re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name).strip()
|
||||
|
||||
|
||||
def make_abbrev(title: str) -> str:
|
||||
"""Create abbreviation from title."""
|
||||
if not title:
|
||||
return ""
|
||||
words = re.findall(r"[A-Za-zÁ-Žá-ž0-9]+", title)
|
||||
abbr = ""
|
||||
for w in words:
|
||||
if w.isdigit():
|
||||
abbr += w
|
||||
else:
|
||||
abbr += w[0]
|
||||
return abbr.upper()
|
||||
|
||||
|
||||
def clean_folder(folder: Path, valid_files: set):
|
||||
"""Remove unexpected files except ▲ files."""
|
||||
if not folder.exists():
|
||||
return
|
||||
|
||||
for f in folder.iterdir():
|
||||
if f.is_file():
|
||||
if f.name.startswith("▲"):
|
||||
continue
|
||||
sanitized = sanitize_name(f.name)
|
||||
if sanitized not in valid_files:
|
||||
print(f"🗑️ Removing unexpected file: {f.name}")
|
||||
try:
|
||||
f.unlink()
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not delete {f}: {e}")
|
||||
|
||||
|
||||
# ==============================
|
||||
# 📦 DB CONNECTION
|
||||
# ==============================
|
||||
|
||||
conn = pymysql.connect(**DB_CONFIG)
|
||||
cur_meta = conn.cursor(pymysql.cursors.DictCursor)
|
||||
cur_blob = conn.cursor()
|
||||
|
||||
print("🔍 Loading only requests with NEW attachments…")
|
||||
|
||||
cur_meta.execute("""
|
||||
SELECT
|
||||
p.id AS request_id,
|
||||
p.displayTitle,
|
||||
p.pacient_jmeno,
|
||||
p.pacient_prijmeni,
|
||||
p.updatedAt,
|
||||
p.attachmentsProcessed,
|
||||
d.filename,
|
||||
d.created_at
|
||||
FROM pozadavky p
|
||||
JOIN medevio_downloads d ON d.request_id = p.id
|
||||
LEFT JOIN (
|
||||
SELECT request_id, MAX(created_at) AS last_attachment_ts
|
||||
FROM medevio_downloads
|
||||
GROUP BY request_id
|
||||
) x ON x.request_id = p.id
|
||||
WHERE p.attachmentsProcessed IS NULL
|
||||
OR p.attachmentsProcessed < x.last_attachment_ts
|
||||
ORDER BY p.updatedAt DESC;
|
||||
""")
|
||||
|
||||
rows = cur_meta.fetchall()
|
||||
print(f"📋 Found {len(rows)} attachment rows belonging to requests needing processing.\n")
|
||||
|
||||
# ==============================
|
||||
# 🧠 PREPARE REQUEST GROUPING
|
||||
# ==============================
|
||||
|
||||
grouped = defaultdict(list)
|
||||
for r in rows:
|
||||
grouped[r["request_id"]].append(r)
|
||||
|
||||
unique_request_ids = list(grouped.keys())
|
||||
total_requests = len(unique_request_ids)
|
||||
|
||||
print(f"🔄 Processing {total_requests} requests needing updates…\n")
|
||||
|
||||
# ==============================
|
||||
# 🧠 MAIN LOOP
|
||||
# ==============================
|
||||
|
||||
index = 0
|
||||
|
||||
for req_id in unique_request_ids:
|
||||
index += 1
|
||||
pct = (index / total_requests) * 100
|
||||
|
||||
print(f"\n[ {pct:5.1f}% ] Processing request {index}/{total_requests} → {req_id}")
|
||||
|
||||
req_rows = grouped[req_id]
|
||||
first = req_rows[0]
|
||||
|
||||
# Build folder name
|
||||
updated_at = first["updatedAt"] or datetime.now()
|
||||
date_str = updated_at.strftime("%Y-%m-%d")
|
||||
|
||||
prijmeni = sanitize_name(first["pacient_prijmeni"] or "Unknown")
|
||||
jmeno = sanitize_name(first["pacient_jmeno"] or "")
|
||||
abbr = make_abbrev(first["displayTitle"])
|
||||
|
||||
desired_folder_name = sanitize_name(f"{date_str} {prijmeni}, {jmeno} [{abbr}] {req_id}")
|
||||
|
||||
# Detect existing folder for request
|
||||
main_folder = None
|
||||
for f in BASE_DIR.iterdir():
|
||||
if f.is_dir() and req_id in f.name:
|
||||
main_folder = f
|
||||
break
|
||||
|
||||
if not main_folder:
|
||||
main_folder = BASE_DIR / desired_folder_name
|
||||
|
||||
main_folder.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Build valid filename set
|
||||
valid_files = {sanitize_name(r["filename"]) for r in req_rows}
|
||||
|
||||
# Clean unexpected non-▲ files
|
||||
clean_folder(main_folder, valid_files)
|
||||
|
||||
# Track if ANY new files were downloaded
|
||||
added_new_file = False
|
||||
|
||||
# DOWNLOAD MISSING FILES
|
||||
for r in req_rows:
|
||||
filename = sanitize_name(r["filename"])
|
||||
dest_plain = main_folder / filename
|
||||
dest_flag = main_folder / ("▲" + filename)
|
||||
|
||||
# Skip if file already exists (plain or ▲)
|
||||
if dest_plain.exists() or dest_flag.exists():
|
||||
continue
|
||||
|
||||
# Fetch content
|
||||
cur_blob.execute("""
|
||||
SELECT file_content
|
||||
FROM medevio_downloads
|
||||
WHERE request_id=%s AND filename=%s
|
||||
""", (req_id, r["filename"]))
|
||||
|
||||
row = cur_blob.fetchone()
|
||||
if not row or not row[0]:
|
||||
continue
|
||||
|
||||
with open(dest_plain, "wb") as f:
|
||||
f.write(row[0])
|
||||
|
||||
print(f"💾 Wrote: {dest_plain.relative_to(BASE_DIR)}")
|
||||
added_new_file = True
|
||||
|
||||
# ------------------------------------
|
||||
# 🟦 FOLDER ▲ LOGIC (IMPORTANT)
|
||||
# ------------------------------------
|
||||
if added_new_file:
|
||||
# If folder contains ▲ in its name → remove it
|
||||
if "▲" in main_folder.name:
|
||||
new_name = main_folder.name.replace("▲", "").strip()
|
||||
new_path = main_folder.parent / new_name
|
||||
|
||||
try:
|
||||
main_folder.rename(new_path)
|
||||
print(f"🔄 Folder flag ▲ removed → {new_name}")
|
||||
main_folder = new_path
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not rename folder: {e}")
|
||||
else:
|
||||
# NO new files → NEVER rename folder
|
||||
pass
|
||||
|
||||
# Mark request as processed
|
||||
cur_meta.execute(
|
||||
"UPDATE pozadavky SET attachmentsProcessed = NOW() WHERE id=%s",
|
||||
(req_id,)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
# ==============================
|
||||
# 🏁 DONE
|
||||
# ==============================
|
||||
|
||||
print("\n🎯 Export complete.\n")
|
||||
cur_blob.close()
|
||||
cur_meta.close()
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user