Z230
This commit is contained in:
45
AdobeFlatten/10 FlattenAdobe.py
Normal file
45
AdobeFlatten/10 FlattenAdobe.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import fitz
|
||||
from pathlib import Path
|
||||
|
||||
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\AdobeFlattenStamp")
|
||||
|
||||
def flatten_pdf_rasterize(input_pdf: Path):
|
||||
print(f"Processing: {input_pdf.name}")
|
||||
doc = fitz.open(input_pdf)
|
||||
|
||||
# Create a new empty PDF
|
||||
new_doc = fitz.open()
|
||||
|
||||
for page in doc:
|
||||
# Render each page to a high-resolution image
|
||||
pix = page.get_pixmap(dpi=400)
|
||||
|
||||
# Create a new PDF page with same size
|
||||
new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height)
|
||||
|
||||
# Insert the rasterized image
|
||||
new_page.insert_image(new_page.rect, pixmap=pix)
|
||||
|
||||
# Save output
|
||||
output_pdf = input_pdf.with_name(input_pdf.stem + "_flatten.pdf")
|
||||
new_doc.save(output_pdf, deflate=True)
|
||||
new_doc.close()
|
||||
doc.close()
|
||||
|
||||
print(f" ✔ Saved: {output_pdf.name}")
|
||||
|
||||
|
||||
def main():
|
||||
pdfs = list(BASE_DIR.glob("*.pdf"))
|
||||
if not pdfs:
|
||||
print("No PDF files found.")
|
||||
return
|
||||
|
||||
for pdf in pdfs:
|
||||
flatten_pdf_rasterize(pdf)
|
||||
|
||||
print("\nAll files processed.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -26,9 +26,22 @@ def ocr_page(page):
|
||||
|
||||
|
||||
def extract_rodne_cislo(text):
|
||||
"""Extract 10-digit rodné číslo (no slash)."""
|
||||
m = re.search(r"\b\d{9,10}\b", text)
|
||||
return m.group(0) if m else None
|
||||
"""
|
||||
Extract rodné číslo in formats:
|
||||
- 6 digits + slash + 4 digits → 655527/1910
|
||||
- 6 digits + slash + 3 digits → 655527/910
|
||||
- 10 digits without slash → 6555271910
|
||||
|
||||
Always returns 10 digits without slash.
|
||||
"""
|
||||
m = re.search(r"\b(\d{6})/?(\d{3,4})\b", text)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
left = m.group(1)
|
||||
right = m.group(2).zfill(4) # ensure 4 digits
|
||||
|
||||
return left + right
|
||||
|
||||
|
||||
def extract_date(text):
|
||||
|
||||
@@ -10,7 +10,7 @@ FOLDER_2 = Path(r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
||||
TRIANGLE = "▲"
|
||||
|
||||
# Set to True for testing (no changes), False to really rename
|
||||
DRY_RUN = True
|
||||
DRY_RUN = False
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
Reference in New Issue
Block a user