Merge remote-tracking branch 'origin/master'
This commit is contained in:
53
ECG/10 ECG test1.py
Normal file
53
ECG/10 ECG test1.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import fitz
|
||||||
|
from pathlib import Path
|
||||||
|
import os
|
||||||
|
|
||||||
|
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\EKGforProcessing")
|
||||||
|
|
||||||
|
FLAG = "rotated-by-script"
|
||||||
|
|
||||||
|
for pdf_path in BASE_DIR.glob("*.pdf"):
|
||||||
|
print(f"Processing: {pdf_path.name}")
|
||||||
|
|
||||||
|
doc = fitz.open(pdf_path)
|
||||||
|
|
||||||
|
meta = doc.metadata
|
||||||
|
keywords = meta.get("keywords", "") or meta.get("Keywords", "") or ""
|
||||||
|
|
||||||
|
# ---- Check if already processed ----
|
||||||
|
if FLAG in keywords:
|
||||||
|
print(" → Already rotated, skipping.")
|
||||||
|
doc.close()
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
# ---- Rotate first page ----
|
||||||
|
first = doc[0]
|
||||||
|
first.set_rotation((first.rotation + 90) % 360)
|
||||||
|
|
||||||
|
# ---- Delete page 2 (if exists) ----
|
||||||
|
if doc.page_count > 1:
|
||||||
|
doc.delete_page(1)
|
||||||
|
|
||||||
|
# ---- Update metadata Keywords ----
|
||||||
|
new_keywords = (keywords + " " + FLAG).strip()
|
||||||
|
meta["keywords"] = new_keywords
|
||||||
|
doc.set_metadata(meta)
|
||||||
|
|
||||||
|
# ---- Save via temporary file ----
|
||||||
|
tmp_path = pdf_path.with_suffix(".tmp.pdf")
|
||||||
|
doc.save(tmp_path, deflate=True, garbage=3)
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
os.replace(tmp_path, pdf_path)
|
||||||
|
|
||||||
|
print(" → Rotated & marked.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {e}")
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
print("Done.")
|
||||||
155
ECG/20 ECG test2.py
Normal file
155
ECG/20 ECG test2.py
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import fitz
|
||||||
|
from pathlib import Path
|
||||||
|
import os
|
||||||
|
import easyocr
|
||||||
|
from PIL import Image
|
||||||
|
import io
|
||||||
|
import re,time
|
||||||
|
|
||||||
|
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\EKGforProcessing")
|
||||||
|
FLAG = "rotated-by-script"
|
||||||
|
|
||||||
|
# OCR Reader
|
||||||
|
reader = easyocr.Reader(['cs'], gpu=False)
|
||||||
|
|
||||||
|
|
||||||
|
def ocr_page(page):
|
||||||
|
pix = page.get_pixmap(alpha=False)
|
||||||
|
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, format="PNG")
|
||||||
|
lines = reader.readtext(buf.getvalue(), detail=0)
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_rodne_cislo(text):
|
||||||
|
"""Extract 10-digit rodné číslo (no slash)."""
|
||||||
|
m = re.search(r"\b\d{9,10}\b", text)
|
||||||
|
return m.group(0) if m else None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_date(text):
|
||||||
|
"""Extract DD.MM.YYYY from 'DD.MM.YYYY HH.MM.SS'."""
|
||||||
|
m = re.search(r"\b(\d{1,2}\.\d{1,2}\.\d{4})\b", text)
|
||||||
|
return m.group(1) if m else None
|
||||||
|
|
||||||
|
|
||||||
|
def convert_date_to_iso(dmy):
|
||||||
|
"""Convert DD.MM.YYYY → YYYY-MM-DD."""
|
||||||
|
d, m, y = dmy.split(".")
|
||||||
|
return f"{y}-{m.zfill(2)}-{d.zfill(2)}"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def rename_ecg_file(pdf_path, rc, date_dmy):
|
||||||
|
"""Rename PDF reliably, even if Dropbox temporarily locks it."""
|
||||||
|
date_iso = convert_date_to_iso(date_dmy)
|
||||||
|
new_name = f"{rc} {date_iso} [EKG] [bez hodnocení].pdf"
|
||||||
|
new_path = pdf_path.with_name(new_name)
|
||||||
|
|
||||||
|
if new_path.exists():
|
||||||
|
print(f" ⚠ File with name already exists: {new_name}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Try renaming with retries in case Dropbox locks the file
|
||||||
|
for attempt in range(15): # ~4.5 seconds total
|
||||||
|
try:
|
||||||
|
pdf_path.rename(new_path)
|
||||||
|
print(f" → File renamed to: {new_name}")
|
||||||
|
return
|
||||||
|
except PermissionError:
|
||||||
|
print(f" ⚠ File locked (Dropbox?), retrying... {attempt+1}/15")
|
||||||
|
time.sleep(1)
|
||||||
|
print(" ❌ Could not rename file after several attempts.")
|
||||||
|
|
||||||
|
|
||||||
|
for pdf_path in BASE_DIR.glob("*.pdf"):
|
||||||
|
print(f"\nProcessing: {pdf_path.name}")
|
||||||
|
|
||||||
|
doc = fitz.open(pdf_path)
|
||||||
|
meta = doc.metadata
|
||||||
|
keywords = meta.get("keywords", "") or meta.get("Keywords", "")
|
||||||
|
|
||||||
|
# =============================
|
||||||
|
# 1) ALREADY ROTATED → do OCR
|
||||||
|
# =============================
|
||||||
|
if FLAG in keywords:
|
||||||
|
print(" → Already rotated, skipping rotation.")
|
||||||
|
page = doc[0]
|
||||||
|
print(" Performing OCR...")
|
||||||
|
text = ocr_page(page)
|
||||||
|
|
||||||
|
print("----- OCR RESULT -----")
|
||||||
|
print(text)
|
||||||
|
print("----------------------")
|
||||||
|
|
||||||
|
rc = extract_rodne_cislo(text)
|
||||||
|
date = extract_date(text)
|
||||||
|
|
||||||
|
print("\n----- EXTRACTED DATA -----")
|
||||||
|
print("Rodné číslo :", rc)
|
||||||
|
print("Datum :", date)
|
||||||
|
print("---------------------------")
|
||||||
|
|
||||||
|
# IMPORTANT: close file BEFORE renaming
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
if rc and date:
|
||||||
|
rename_ecg_file(pdf_path, rc, date)
|
||||||
|
else:
|
||||||
|
print(" ⚠ Missing RC or date – file NOT renamed.")
|
||||||
|
|
||||||
|
continue
|
||||||
|
|
||||||
|
# =============================
|
||||||
|
# 2) NOT ROTATED → rotate + OCR
|
||||||
|
# =============================
|
||||||
|
try:
|
||||||
|
first = doc[0]
|
||||||
|
first.set_rotation((first.rotation + 90) % 360)
|
||||||
|
|
||||||
|
if doc.page_count > 1:
|
||||||
|
doc.delete_page(1)
|
||||||
|
|
||||||
|
meta["keywords"] = (keywords + " " + FLAG).strip()
|
||||||
|
doc.set_metadata(meta)
|
||||||
|
|
||||||
|
tmp = pdf_path.with_suffix(".tmp.pdf")
|
||||||
|
doc.save(tmp, deflate=True, garbage=3)
|
||||||
|
doc.close()
|
||||||
|
os.replace(tmp, pdf_path)
|
||||||
|
|
||||||
|
print(" → Rotated + saved + marked")
|
||||||
|
|
||||||
|
doc2 = fitz.open(pdf_path)
|
||||||
|
page = doc2[0]
|
||||||
|
text = ocr_page(page)
|
||||||
|
print("----- OCR RESULT -----")
|
||||||
|
print(text)
|
||||||
|
print("----------------------")
|
||||||
|
|
||||||
|
rc = extract_rodne_cislo(text)
|
||||||
|
date = extract_date(text)
|
||||||
|
|
||||||
|
print("\n----- EXTRACTED DATA -----")
|
||||||
|
print("Rodné číslo :", rc)
|
||||||
|
print("Datum :", date)
|
||||||
|
print("---------------------------")
|
||||||
|
|
||||||
|
# CLOSE PDF FIRST — VERY IMPORTANT
|
||||||
|
doc2.close()
|
||||||
|
|
||||||
|
if rc and date:
|
||||||
|
rename_ecg_file(pdf_path, rc, date)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("❌ Error:", e)
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
print("\nDone.")
|
||||||
Reference in New Issue
Block a user