Files
medicus/PDF optimization.py
2025-03-23 10:17:17 +01:00

73 lines
3.0 KiB
Python

import io
import PyPDF2
import os
from PyPDF2 import PdfReader, PdfWriter
def optimize_pdf_with_images(input_file, destination_folder):
# """ Optimize a PDF by reducing the size of embedded images while preserving all other content. """/
optimized_pdf_path = os.path.join(destination_folder, os.path.basename(input_file))
pdf_reader = PdfReader(input_file)
pdf_writer = PdfWriter()
for pdf_page in pdf_reader.pages:
page_resources = pdf_page.get("/Resources")
if page_resources:
page_resources = page_resources.get_object() # Resolve IndirectObject
if "/XObject" in page_resources:
image_objects = page_resources["/XObject"].get_object()
for img_name in image_objects:
img_obj = image_objects[img_name]
if img_obj.get("/Subtype") == "/Image":
# Extract the image data
image_data = img_obj._data
image_stream = io.BytesIO(image_data)
# Compress the image
compressed_image_stream = compress_image_for_pdf(image_stream)
if compressed_image_stream is None:
print(f"Skipping invalid image: {img_name}")
continue # Skip this image if compression failed
# Get new image dimensions
resized_image = Image.open(compressed_image_stream)
new_width, new_height = resized_image.size
# Create a new image object with compressed data
new_image_object = StreamObject()
new_image_object._data = compressed_image_stream.getvalue()
new_image_object.update({
NameObject("/Filter"): NameObject("/DCTDecode"),
NameObject("/Subtype"): NameObject("/Image"),
NameObject("/Width"): NumberObject(new_width),
NameObject("/Height"): NumberObject(new_height),
NameObject("/ColorSpace"): NameObject("/DeviceRGB"),
NameObject("/BitsPerComponent"): NumberObject(8),
})
# Replace the old image with the new one
image_objects[img_name] = new_image_object
pdf_writer.add_page(pdf_page)
pdf_reader.stream.close()
print(f"PDF reader closed, now writing the optimized file.")
print(f"optimized_pdf_path : {optimized_pdf_path}") # Log the file path
with open(optimized_pdf_path, "wb") as output_pdf:
pdf_writer.write(output_pdf)
print(f"Optimized PDF saved at: {optimized_pdf_path}")
return optimized_pdf_path
optimize_pdf_with_images(r"u:\Dropbox\!!!Days\Downloads Z230\Dokumentace\6504140489 2025-03-04 Harsa, Radovan [PZ psychiatrie] [28FEB2025-04MAR2025 vágní suicidální proklamace, zbytečná hospitalizace].pdf",
r"u:\Dropbox\!!!Days\Downloads Z230\Dokumentace\Converted")