This commit is contained in:
2025-11-23 22:05:21 +01:00
parent d4894fde95
commit 5fe221ea94
3 changed files with 62 additions and 4 deletions

View File

@@ -0,0 +1,45 @@
import fitz
from pathlib import Path
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\AdobeFlattenStamp")
def flatten_pdf_rasterize(input_pdf: Path):
print(f"Processing: {input_pdf.name}")
doc = fitz.open(input_pdf)
# Create a new empty PDF
new_doc = fitz.open()
for page in doc:
# Render each page to a high-resolution image
pix = page.get_pixmap(dpi=400)
# Create a new PDF page with same size
new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height)
# Insert the rasterized image
new_page.insert_image(new_page.rect, pixmap=pix)
# Save output
output_pdf = input_pdf.with_name(input_pdf.stem + "_flatten.pdf")
new_doc.save(output_pdf, deflate=True)
new_doc.close()
doc.close()
print(f" ✔ Saved: {output_pdf.name}")
def main():
pdfs = list(BASE_DIR.glob("*.pdf"))
if not pdfs:
print("No PDF files found.")
return
for pdf in pdfs:
flatten_pdf_rasterize(pdf)
print("\nAll files processed.")
if __name__ == "__main__":
main()

View File

@@ -26,9 +26,22 @@ def ocr_page(page):
def extract_rodne_cislo(text): def extract_rodne_cislo(text):
"""Extract 10-digit rodné číslo (no slash).""" """
m = re.search(r"\b\d{9,10}\b", text) Extract rodné číslo in formats:
return m.group(0) if m else None - 6 digits + slash + 4 digits → 655527/1910
- 6 digits + slash + 3 digits → 655527/910
- 10 digits without slash → 6555271910
Always returns 10 digits without slash.
"""
m = re.search(r"\b(\d{6})/?(\d{3,4})\b", text)
if not m:
return None
left = m.group(1)
right = m.group(2).zfill(4) # ensure 4 digits
return left + right
def extract_date(text): def extract_date(text):

View File

@@ -10,7 +10,7 @@ FOLDER_2 = Path(r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
TRIANGLE = "" TRIANGLE = ""
# Set to True for testing (no changes), False to really rename # Set to True for testing (no changes), False to really rename
DRY_RUN = True DRY_RUN = False
def main(): def main():