Z230
This commit is contained in:
45
AdobeFlatten/10 FlattenAdobe.py
Normal file
45
AdobeFlatten/10 FlattenAdobe.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
import fitz
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
BASE_DIR = Path(r"u:\Dropbox\Ordinace\Dokumentace_ke_zpracování\AdobeFlattenStamp")
|
||||||
|
|
||||||
|
def flatten_pdf_rasterize(input_pdf: Path):
|
||||||
|
print(f"Processing: {input_pdf.name}")
|
||||||
|
doc = fitz.open(input_pdf)
|
||||||
|
|
||||||
|
# Create a new empty PDF
|
||||||
|
new_doc = fitz.open()
|
||||||
|
|
||||||
|
for page in doc:
|
||||||
|
# Render each page to a high-resolution image
|
||||||
|
pix = page.get_pixmap(dpi=400)
|
||||||
|
|
||||||
|
# Create a new PDF page with same size
|
||||||
|
new_page = new_doc.new_page(width=page.rect.width, height=page.rect.height)
|
||||||
|
|
||||||
|
# Insert the rasterized image
|
||||||
|
new_page.insert_image(new_page.rect, pixmap=pix)
|
||||||
|
|
||||||
|
# Save output
|
||||||
|
output_pdf = input_pdf.with_name(input_pdf.stem + "_flatten.pdf")
|
||||||
|
new_doc.save(output_pdf, deflate=True)
|
||||||
|
new_doc.close()
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
print(f" ✔ Saved: {output_pdf.name}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
pdfs = list(BASE_DIR.glob("*.pdf"))
|
||||||
|
if not pdfs:
|
||||||
|
print("No PDF files found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
for pdf in pdfs:
|
||||||
|
flatten_pdf_rasterize(pdf)
|
||||||
|
|
||||||
|
print("\nAll files processed.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -26,9 +26,22 @@ def ocr_page(page):
|
|||||||
|
|
||||||
|
|
||||||
def extract_rodne_cislo(text):
|
def extract_rodne_cislo(text):
|
||||||
"""Extract 10-digit rodné číslo (no slash)."""
|
"""
|
||||||
m = re.search(r"\b\d{9,10}\b", text)
|
Extract rodné číslo in formats:
|
||||||
return m.group(0) if m else None
|
- 6 digits + slash + 4 digits → 655527/1910
|
||||||
|
- 6 digits + slash + 3 digits → 655527/910
|
||||||
|
- 10 digits without slash → 6555271910
|
||||||
|
|
||||||
|
Always returns 10 digits without slash.
|
||||||
|
"""
|
||||||
|
m = re.search(r"\b(\d{6})/?(\d{3,4})\b", text)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
|
||||||
|
left = m.group(1)
|
||||||
|
right = m.group(2).zfill(4) # ensure 4 digits
|
||||||
|
|
||||||
|
return left + right
|
||||||
|
|
||||||
|
|
||||||
def extract_date(text):
|
def extract_date(text):
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ FOLDER_2 = Path(r"U:\Dropbox\Ordinace\Dokumentace_ke_zpracování\MP")
|
|||||||
TRIANGLE = "▲"
|
TRIANGLE = "▲"
|
||||||
|
|
||||||
# Set to True for testing (no changes), False to really rename
|
# Set to True for testing (no changes), False to really rename
|
||||||
DRY_RUN = True
|
DRY_RUN = False
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
Reference in New Issue
Block a user