73 lines
3.0 KiB
Python
73 lines
3.0 KiB
Python
import io
|
|
|
|
import PyPDF2
|
|
import os
|
|
|
|
from PyPDF2 import PdfReader, PdfWriter
|
|
|
|
|
|
def optimize_pdf_with_images(input_file, destination_folder):
|
|
# """ Optimize a PDF by reducing the size of embedded images while preserving all other content. """/
|
|
|
|
optimized_pdf_path = os.path.join(destination_folder, os.path.basename(input_file))
|
|
|
|
pdf_reader = PdfReader(input_file)
|
|
pdf_writer = PdfWriter()
|
|
|
|
for pdf_page in pdf_reader.pages:
|
|
page_resources = pdf_page.get("/Resources")
|
|
if page_resources:
|
|
page_resources = page_resources.get_object() # Resolve IndirectObject
|
|
|
|
if "/XObject" in page_resources:
|
|
image_objects = page_resources["/XObject"].get_object()
|
|
|
|
for img_name in image_objects:
|
|
img_obj = image_objects[img_name]
|
|
|
|
if img_obj.get("/Subtype") == "/Image":
|
|
|
|
# Extract the image data
|
|
image_data = img_obj._data
|
|
image_stream = io.BytesIO(image_data)
|
|
|
|
# Compress the image
|
|
compressed_image_stream = compress_image_for_pdf(image_stream)
|
|
|
|
if compressed_image_stream is None:
|
|
print(f"Skipping invalid image: {img_name}")
|
|
continue # Skip this image if compression failed
|
|
|
|
# Get new image dimensions
|
|
resized_image = Image.open(compressed_image_stream)
|
|
new_width, new_height = resized_image.size
|
|
|
|
# Create a new image object with compressed data
|
|
new_image_object = StreamObject()
|
|
new_image_object._data = compressed_image_stream.getvalue()
|
|
new_image_object.update({
|
|
NameObject("/Filter"): NameObject("/DCTDecode"),
|
|
NameObject("/Subtype"): NameObject("/Image"),
|
|
NameObject("/Width"): NumberObject(new_width),
|
|
NameObject("/Height"): NumberObject(new_height),
|
|
NameObject("/ColorSpace"): NameObject("/DeviceRGB"),
|
|
NameObject("/BitsPerComponent"): NumberObject(8),
|
|
})
|
|
|
|
# Replace the old image with the new one
|
|
image_objects[img_name] = new_image_object
|
|
|
|
pdf_writer.add_page(pdf_page)
|
|
|
|
pdf_reader.stream.close()
|
|
|
|
print(f"PDF reader closed, now writing the optimized file.")
|
|
print(f"optimized_pdf_path : {optimized_pdf_path}") # Log the file path
|
|
with open(optimized_pdf_path, "wb") as output_pdf:
|
|
pdf_writer.write(output_pdf)
|
|
|
|
print(f"Optimized PDF saved at: {optimized_pdf_path}")
|
|
return optimized_pdf_path
|
|
|
|
optimize_pdf_with_images(r"u:\Dropbox\!!!Days\Downloads Z230\Dokumentace\6504140489 2025-03-04 Harsa, Radovan [PZ psychiatrie] [28FEB2025-04MAR2025 vágní suicidální proklamace, zbytečná hospitalizace].pdf",
|
|
r"u:\Dropbox\!!!Days\Downloads Z230\Dokumentace\Converted") |