diff --git a/ECG/10 ECG test1.py b/ECG/10 ECG test1.py new file mode 100644 index 0000000..7c77683 --- /dev/null +++ b/ECG/10 ECG test1.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import fitz +from pathlib import Path +import os + +BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\EKGforProcessing") + +FLAG = "rotated-by-script" + +for pdf_path in BASE_DIR.glob("*.pdf"): + print(f"Processing: {pdf_path.name}") + + doc = fitz.open(pdf_path) + + meta = doc.metadata + keywords = meta.get("keywords", "") or meta.get("Keywords", "") or "" + + # ---- Check if already processed ---- + if FLAG in keywords: + print(" → Already rotated, skipping.") + doc.close() + continue + + try: + # ---- Rotate first page ---- + first = doc[0] + first.set_rotation((first.rotation + 90) % 360) + + # ---- Delete page 2 (if exists) ---- + if doc.page_count > 1: + doc.delete_page(1) + + # ---- Update metadata Keywords ---- + new_keywords = (keywords + " " + FLAG).strip() + meta["keywords"] = new_keywords + doc.set_metadata(meta) + + # ---- Save via temporary file ---- + tmp_path = pdf_path.with_suffix(".tmp.pdf") + doc.save(tmp_path, deflate=True, garbage=3) + doc.close() + + os.replace(tmp_path, pdf_path) + + print(" → Rotated & marked.") + + except Exception as e: + print(f"❌ Error: {e}") + doc.close() + +print("Done.") diff --git a/ECG/20 ECG test2.py b/ECG/20 ECG test2.py new file mode 100644 index 0000000..61862dd --- /dev/null +++ b/ECG/20 ECG test2.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import fitz +from pathlib import Path +import os +import easyocr +from PIL import Image +import io +import re,time + +BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\EKGforProcessing") +FLAG = "rotated-by-script" + +# OCR Reader +reader = easyocr.Reader(['cs'], gpu=False) + + +def ocr_page(page): + pix = page.get_pixmap(alpha=False) + img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) + buf = io.BytesIO() + img.save(buf, format="PNG") + lines = reader.readtext(buf.getvalue(), detail=0) + return "\n".join(lines) + + +def extract_rodne_cislo(text): + """Extract 10-digit rodné číslo (no slash).""" + m = re.search(r"\b\d{9,10}\b", text) + return m.group(0) if m else None + + +def extract_date(text): + """Extract DD.MM.YYYY from 'DD.MM.YYYY HH.MM.SS'.""" + m = re.search(r"\b(\d{1,2}\.\d{1,2}\.\d{4})\b", text) + return m.group(1) if m else None + + +def convert_date_to_iso(dmy): + """Convert DD.MM.YYYY → YYYY-MM-DD.""" + d, m, y = dmy.split(".") + return f"{y}-{m.zfill(2)}-{d.zfill(2)}" + + + + +def rename_ecg_file(pdf_path, rc, date_dmy): + """Rename PDF reliably, even if Dropbox temporarily locks it.""" + date_iso = convert_date_to_iso(date_dmy) + new_name = f"{rc} {date_iso} [EKG] [bez hodnocení].pdf" + new_path = pdf_path.with_name(new_name) + + if new_path.exists(): + print(f" ⚠ File with name already exists: {new_name}") + return + + # Try renaming with retries in case Dropbox locks the file + for attempt in range(15): # ~4.5 seconds total + try: + pdf_path.rename(new_path) + print(f" → File renamed to: {new_name}") + return + except PermissionError: + print(f" ⚠ File locked (Dropbox?), retrying... {attempt+1}/15") + time.sleep(1) + print(" ❌ Could not rename file after several attempts.") + + +for pdf_path in BASE_DIR.glob("*.pdf"): + print(f"\nProcessing: {pdf_path.name}") + + doc = fitz.open(pdf_path) + meta = doc.metadata + keywords = meta.get("keywords", "") or meta.get("Keywords", "") + + # ============================= + # 1) ALREADY ROTATED → do OCR + # ============================= + if FLAG in keywords: + print(" → Already rotated, skipping rotation.") + page = doc[0] + print(" Performing OCR...") + text = ocr_page(page) + + print("----- OCR RESULT -----") + print(text) + print("----------------------") + + rc = extract_rodne_cislo(text) + date = extract_date(text) + + print("\n----- EXTRACTED DATA -----") + print("Rodné číslo :", rc) + print("Datum :", date) + print("---------------------------") + + # IMPORTANT: close file BEFORE renaming + doc.close() + + if rc and date: + rename_ecg_file(pdf_path, rc, date) + else: + print(" ⚠ Missing RC or date – file NOT renamed.") + + continue + + # ============================= + # 2) NOT ROTATED → rotate + OCR + # ============================= + try: + first = doc[0] + first.set_rotation((first.rotation + 90) % 360) + + if doc.page_count > 1: + doc.delete_page(1) + + meta["keywords"] = (keywords + " " + FLAG).strip() + doc.set_metadata(meta) + + tmp = pdf_path.with_suffix(".tmp.pdf") + doc.save(tmp, deflate=True, garbage=3) + doc.close() + os.replace(tmp, pdf_path) + + print(" → Rotated + saved + marked") + + doc2 = fitz.open(pdf_path) + page = doc2[0] + text = ocr_page(page) + print("----- OCR RESULT -----") + print(text) + print("----------------------") + + rc = extract_rodne_cislo(text) + date = extract_date(text) + + print("\n----- EXTRACTED DATA -----") + print("Rodné číslo :", rc) + print("Datum :", date) + print("---------------------------") + + # CLOSE PDF FIRST — VERY IMPORTANT + doc2.close() + + if rc and date: + rename_ecg_file(pdf_path, rc, date) + + + + except Exception as e: + print("❌ Error:", e) + doc.close() + +print("\nDone.")