From 5fe221ea94b54dd0df5969c2daf080c562c06fef Mon Sep 17 00:00:00 2001 From: "vladimir.buzalka" Date: Sun, 23 Nov 2025 22:05:21 +0100 Subject: [PATCH] Z230 --- AdobeFlatten/10 FlattenAdobe.py | 45 +++++++++++++++++++ ECG/20 ECG test2.py | 19 ++++++-- .../20 TaggDownloadedPrilohyFromMedevio.py | 2 +- 3 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 AdobeFlatten/10 FlattenAdobe.py diff --git a/AdobeFlatten/10 FlattenAdobe.py b/AdobeFlatten/10 FlattenAdobe.py new file mode 100644 index 0000000..8dc86c2 --- /dev/null +++ b/AdobeFlatten/10 FlattenAdobe.py @@ -0,0 +1,45 @@ +import fitz +from pathlib import Path + +BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\AdobeFlattenStamp") + +def flatten_pdf_rasterize(input_pdf: Path): + print(f"Processing: {input_pdf.name}") + doc = fitz.open(input_pdf) + + # Create a new empty PDF + new_doc = fitz.open() + + for page in doc: + # Render each page to a high-resolution image + pix = page.get_pixmap(dpi=400) + + # Create a new PDF page with same size + new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height) + + # Insert the rasterized image + new_page.insert_image(new_page.rect, pixmap=pix) + + # Save output + output_pdf = input_pdf.with_name(input_pdf.stem + "_flatten.pdf") + new_doc.save(output_pdf, deflate=True) + new_doc.close() + doc.close() + + print(f" ✔ Saved: {output_pdf.name}") + + +def main(): + pdfs = list(BASE_DIR.glob("*.pdf")) + if not pdfs: + print("No PDF files found.") + return + + for pdf in pdfs: + flatten_pdf_rasterize(pdf) + + print("\nAll files processed.") + + +if __name__ == "__main__": + main() diff --git a/ECG/20 ECG test2.py b/ECG/20 ECG test2.py index 61862dd..895975f 100644 --- a/ECG/20 ECG test2.py +++ b/ECG/20 ECG test2.py @@ -26,9 +26,22 @@ def ocr_page(page): def extract_rodne_cislo(text): - """Extract 10-digit rodné číslo (no slash).""" - m = re.search(r"\b\d{9,10}\b", text) - return m.group(0) if m else None + """ + Extract rodné číslo in formats: + - 6 digits + slash + 4 digits → 655527/1910 + - 6 digits + slash + 3 digits → 655527/910 + - 10 digits without slash → 6555271910 + + Always returns 10 digits without slash. + """ + m = re.search(r"\b(\d{6})/?(\d{3,4})\b", text) + if not m: + return None + + left = m.group(1) + right = m.group(2).zfill(4) # ensure 4 digits + + return left + right def extract_date(text): diff --git a/RenameMedevioPrilohy/20 TaggDownloadedPrilohyFromMedevio.py b/RenameMedevioPrilohy/20 TaggDownloadedPrilohyFromMedevio.py index 4564571..e64daab 100644 --- a/RenameMedevioPrilohy/20 TaggDownloadedPrilohyFromMedevio.py +++ b/RenameMedevioPrilohy/20 TaggDownloadedPrilohyFromMedevio.py @@ -10,7 +10,7 @@ FOLDER_2 = Path(r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP") TRIANGLE = "▲" # Set to True for testing (no changes), False to really rename -DRY_RUN = True +DRY_RUN = False def main():