This commit is contained in:
2025-11-14 16:15:20 +01:00
parent d3962cb72b
commit 0f953c2d86
2 changed files with 208 additions and 0 deletions

53
ECG/10 ECG test1.py Normal file
View File

@@ -0,0 +1,53 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import fitz
from pathlib import Path
import os
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\EKGforProcessing")
FLAG = "rotated-by-script"
for pdf_path in BASE_DIR.glob("*.pdf"):
print(f"Processing: {pdf_path.name}")
doc = fitz.open(pdf_path)
meta = doc.metadata
keywords = meta.get("keywords", "") or meta.get("Keywords", "") or ""
# ---- Check if already processed ----
if FLAG in keywords:
print(" → Already rotated, skipping.")
doc.close()
continue
try:
# ---- Rotate first page ----
first = doc[0]
first.set_rotation((first.rotation + 90) % 360)
# ---- Delete page 2 (if exists) ----
if doc.page_count > 1:
doc.delete_page(1)
# ---- Update metadata Keywords ----
new_keywords = (keywords + " " + FLAG).strip()
meta["keywords"] = new_keywords
doc.set_metadata(meta)
# ---- Save via temporary file ----
tmp_path = pdf_path.with_suffix(".tmp.pdf")
doc.save(tmp_path, deflate=True, garbage=3)
doc.close()
os.replace(tmp_path, pdf_path)
print(" → Rotated & marked.")
except Exception as e:
print(f"❌ Error: {e}")
doc.close()
print("Done.")

155
ECG/20 ECG test2.py Normal file
View File

@@ -0,0 +1,155 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import fitz
from pathlib import Path
import os
import easyocr
from PIL import Image
import io
import re,time
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\EKGforProcessing")
FLAG = "rotated-by-script"
# OCR Reader
reader = easyocr.Reader(['cs'], gpu=False)
def ocr_page(page):
pix = page.get_pixmap(alpha=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
buf = io.BytesIO()
img.save(buf, format="PNG")
lines = reader.readtext(buf.getvalue(), detail=0)
return "\n".join(lines)
def extract_rodne_cislo(text):
"""Extract 10-digit rodné číslo (no slash)."""
m = re.search(r"\b\d{9,10}\b", text)
return m.group(0) if m else None
def extract_date(text):
"""Extract DD.MM.YYYY from 'DD.MM.YYYY HH.MM.SS'."""
m = re.search(r"\b(\d{1,2}\.\d{1,2}\.\d{4})\b", text)
return m.group(1) if m else None
def convert_date_to_iso(dmy):
"""Convert DD.MM.YYYY → YYYY-MM-DD."""
d, m, y = dmy.split(".")
return f"{y}-{m.zfill(2)}-{d.zfill(2)}"
def rename_ecg_file(pdf_path, rc, date_dmy):
"""Rename PDF reliably, even if Dropbox temporarily locks it."""
date_iso = convert_date_to_iso(date_dmy)
new_name = f"{rc} {date_iso} [EKG] [bez hodnocení].pdf"
new_path = pdf_path.with_name(new_name)
if new_path.exists():
print(f" ⚠ File with name already exists: {new_name}")
return
# Try renaming with retries in case Dropbox locks the file
for attempt in range(15): # ~4.5 seconds total
try:
pdf_path.rename(new_path)
print(f" → File renamed to: {new_name}")
return
except PermissionError:
print(f" ⚠ File locked (Dropbox?), retrying... {attempt+1}/15")
time.sleep(1)
print(" ❌ Could not rename file after several attempts.")
for pdf_path in BASE_DIR.glob("*.pdf"):
print(f"\nProcessing: {pdf_path.name}")
doc = fitz.open(pdf_path)
meta = doc.metadata
keywords = meta.get("keywords", "") or meta.get("Keywords", "")
# =============================
# 1) ALREADY ROTATED → do OCR
# =============================
if FLAG in keywords:
print(" → Already rotated, skipping rotation.")
page = doc[0]
print(" Performing OCR...")
text = ocr_page(page)
print("----- OCR RESULT -----")
print(text)
print("----------------------")
rc = extract_rodne_cislo(text)
date = extract_date(text)
print("\n----- EXTRACTED DATA -----")
print("Rodné číslo :", rc)
print("Datum :", date)
print("---------------------------")
# IMPORTANT: close file BEFORE renaming
doc.close()
if rc and date:
rename_ecg_file(pdf_path, rc, date)
else:
print(" ⚠ Missing RC or date file NOT renamed.")
continue
# =============================
# 2) NOT ROTATED → rotate + OCR
# =============================
try:
first = doc[0]
first.set_rotation((first.rotation + 90) % 360)
if doc.page_count > 1:
doc.delete_page(1)
meta["keywords"] = (keywords + " " + FLAG).strip()
doc.set_metadata(meta)
tmp = pdf_path.with_suffix(".tmp.pdf")
doc.save(tmp, deflate=True, garbage=3)
doc.close()
os.replace(tmp, pdf_path)
print(" → Rotated + saved + marked")
doc2 = fitz.open(pdf_path)
page = doc2[0]
text = ocr_page(page)
print("----- OCR RESULT -----")
print(text)
print("----------------------")
rc = extract_rodne_cislo(text)
date = extract_date(text)
print("\n----- EXTRACTED DATA -----")
print("Rodné číslo :", rc)
print("Datum :", date)
print("---------------------------")
# CLOSE PDF FIRST — VERY IMPORTANT
doc2.close()
if rc and date:
rename_ecg_file(pdf_path, rc, date)
except Exception as e:
print("❌ Error:", e)
doc.close()
print("\nDone.")