Merge remote-tracking branch 'origin/master'
This commit is contained in:
53
ECG/10 ECG test1.py
Normal file
53
ECG/10 ECG test1.py
Normal file
@@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import fitz
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\EKGforProcessing")
|
||||
|
||||
FLAG = "rotated-by-script"
|
||||
|
||||
for pdf_path in BASE_DIR.glob("*.pdf"):
|
||||
print(f"Processing: {pdf_path.name}")
|
||||
|
||||
doc = fitz.open(pdf_path)
|
||||
|
||||
meta = doc.metadata
|
||||
keywords = meta.get("keywords", "") or meta.get("Keywords", "") or ""
|
||||
|
||||
# ---- Check if already processed ----
|
||||
if FLAG in keywords:
|
||||
print(" → Already rotated, skipping.")
|
||||
doc.close()
|
||||
continue
|
||||
|
||||
try:
|
||||
# ---- Rotate first page ----
|
||||
first = doc[0]
|
||||
first.set_rotation((first.rotation + 90) % 360)
|
||||
|
||||
# ---- Delete page 2 (if exists) ----
|
||||
if doc.page_count > 1:
|
||||
doc.delete_page(1)
|
||||
|
||||
# ---- Update metadata Keywords ----
|
||||
new_keywords = (keywords + " " + FLAG).strip()
|
||||
meta["keywords"] = new_keywords
|
||||
doc.set_metadata(meta)
|
||||
|
||||
# ---- Save via temporary file ----
|
||||
tmp_path = pdf_path.with_suffix(".tmp.pdf")
|
||||
doc.save(tmp_path, deflate=True, garbage=3)
|
||||
doc.close()
|
||||
|
||||
os.replace(tmp_path, pdf_path)
|
||||
|
||||
print(" → Rotated & marked.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
doc.close()
|
||||
|
||||
print("Done.")
|
||||
155
ECG/20 ECG test2.py
Normal file
155
ECG/20 ECG test2.py
Normal file
@@ -0,0 +1,155 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import fitz
|
||||
from pathlib import Path
|
||||
import os
|
||||
import easyocr
|
||||
from PIL import Image
|
||||
import io
|
||||
import re,time
|
||||
|
||||
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\EKGforProcessing")
|
||||
FLAG = "rotated-by-script"
|
||||
|
||||
# OCR Reader
|
||||
reader = easyocr.Reader(['cs'], gpu=False)
|
||||
|
||||
|
||||
def ocr_page(page):
|
||||
pix = page.get_pixmap(alpha=False)
|
||||
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="PNG")
|
||||
lines = reader.readtext(buf.getvalue(), detail=0)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def extract_rodne_cislo(text):
|
||||
"""Extract 10-digit rodné číslo (no slash)."""
|
||||
m = re.search(r"\b\d{9,10}\b", text)
|
||||
return m.group(0) if m else None
|
||||
|
||||
|
||||
def extract_date(text):
|
||||
"""Extract DD.MM.YYYY from 'DD.MM.YYYY HH.MM.SS'."""
|
||||
m = re.search(r"\b(\d{1,2}\.\d{1,2}\.\d{4})\b", text)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def convert_date_to_iso(dmy):
|
||||
"""Convert DD.MM.YYYY → YYYY-MM-DD."""
|
||||
d, m, y = dmy.split(".")
|
||||
return f"{y}-{m.zfill(2)}-{d.zfill(2)}"
|
||||
|
||||
|
||||
|
||||
|
||||
def rename_ecg_file(pdf_path, rc, date_dmy):
|
||||
"""Rename PDF reliably, even if Dropbox temporarily locks it."""
|
||||
date_iso = convert_date_to_iso(date_dmy)
|
||||
new_name = f"{rc} {date_iso} [EKG] [bez hodnocení].pdf"
|
||||
new_path = pdf_path.with_name(new_name)
|
||||
|
||||
if new_path.exists():
|
||||
print(f" ⚠ File with name already exists: {new_name}")
|
||||
return
|
||||
|
||||
# Try renaming with retries in case Dropbox locks the file
|
||||
for attempt in range(15): # ~4.5 seconds total
|
||||
try:
|
||||
pdf_path.rename(new_path)
|
||||
print(f" → File renamed to: {new_name}")
|
||||
return
|
||||
except PermissionError:
|
||||
print(f" ⚠ File locked (Dropbox?), retrying... {attempt+1}/15")
|
||||
time.sleep(1)
|
||||
print(" ❌ Could not rename file after several attempts.")
|
||||
|
||||
|
||||
for pdf_path in BASE_DIR.glob("*.pdf"):
|
||||
print(f"\nProcessing: {pdf_path.name}")
|
||||
|
||||
doc = fitz.open(pdf_path)
|
||||
meta = doc.metadata
|
||||
keywords = meta.get("keywords", "") or meta.get("Keywords", "")
|
||||
|
||||
# =============================
|
||||
# 1) ALREADY ROTATED → do OCR
|
||||
# =============================
|
||||
if FLAG in keywords:
|
||||
print(" → Already rotated, skipping rotation.")
|
||||
page = doc[0]
|
||||
print(" Performing OCR...")
|
||||
text = ocr_page(page)
|
||||
|
||||
print("----- OCR RESULT -----")
|
||||
print(text)
|
||||
print("----------------------")
|
||||
|
||||
rc = extract_rodne_cislo(text)
|
||||
date = extract_date(text)
|
||||
|
||||
print("\n----- EXTRACTED DATA -----")
|
||||
print("Rodné číslo :", rc)
|
||||
print("Datum :", date)
|
||||
print("---------------------------")
|
||||
|
||||
# IMPORTANT: close file BEFORE renaming
|
||||
doc.close()
|
||||
|
||||
if rc and date:
|
||||
rename_ecg_file(pdf_path, rc, date)
|
||||
else:
|
||||
print(" ⚠ Missing RC or date – file NOT renamed.")
|
||||
|
||||
continue
|
||||
|
||||
# =============================
|
||||
# 2) NOT ROTATED → rotate + OCR
|
||||
# =============================
|
||||
try:
|
||||
first = doc[0]
|
||||
first.set_rotation((first.rotation + 90) % 360)
|
||||
|
||||
if doc.page_count > 1:
|
||||
doc.delete_page(1)
|
||||
|
||||
meta["keywords"] = (keywords + " " + FLAG).strip()
|
||||
doc.set_metadata(meta)
|
||||
|
||||
tmp = pdf_path.with_suffix(".tmp.pdf")
|
||||
doc.save(tmp, deflate=True, garbage=3)
|
||||
doc.close()
|
||||
os.replace(tmp, pdf_path)
|
||||
|
||||
print(" → Rotated + saved + marked")
|
||||
|
||||
doc2 = fitz.open(pdf_path)
|
||||
page = doc2[0]
|
||||
text = ocr_page(page)
|
||||
print("----- OCR RESULT -----")
|
||||
print(text)
|
||||
print("----------------------")
|
||||
|
||||
rc = extract_rodne_cislo(text)
|
||||
date = extract_date(text)
|
||||
|
||||
print("\n----- EXTRACTED DATA -----")
|
||||
print("Rodné číslo :", rc)
|
||||
print("Datum :", date)
|
||||
print("---------------------------")
|
||||
|
||||
# CLOSE PDF FIRST — VERY IMPORTANT
|
||||
doc2.close()
|
||||
|
||||
if rc and date:
|
||||
rename_ecg_file(pdf_path, rc, date)
|
||||
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print("❌ Error:", e)
|
||||
doc.close()
|
||||
|
||||
print("\nDone.")
|
||||
Reference in New Issue
Block a user