První nahrdánícd
This commit is contained in:
73
PDF optimization.py
Normal file
73
PDF optimization.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import io
|
||||
|
||||
import PyPDF2
|
||||
import os
|
||||
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
|
||||
|
||||
def optimize_pdf_with_images(input_file, destination_folder):
|
||||
# """ Optimize a PDF by reducing the size of embedded images while preserving all other content. """/
|
||||
|
||||
optimized_pdf_path = os.path.join(destination_folder, os.path.basename(input_file))
|
||||
|
||||
pdf_reader = PdfReader(input_file)
|
||||
pdf_writer = PdfWriter()
|
||||
|
||||
for pdf_page in pdf_reader.pages:
|
||||
page_resources = pdf_page.get("/Resources")
|
||||
if page_resources:
|
||||
page_resources = page_resources.get_object() # Resolve IndirectObject
|
||||
|
||||
if "/XObject" in page_resources:
|
||||
image_objects = page_resources["/XObject"].get_object()
|
||||
|
||||
for img_name in image_objects:
|
||||
img_obj = image_objects[img_name]
|
||||
|
||||
if img_obj.get("/Subtype") == "/Image":
|
||||
|
||||
# Extract the image data
|
||||
image_data = img_obj._data
|
||||
image_stream = io.BytesIO(image_data)
|
||||
|
||||
# Compress the image
|
||||
compressed_image_stream = compress_image_for_pdf(image_stream)
|
||||
|
||||
if compressed_image_stream is None:
|
||||
print(f"Skipping invalid image: {img_name}")
|
||||
continue # Skip this image if compression failed
|
||||
|
||||
# Get new image dimensions
|
||||
resized_image = Image.open(compressed_image_stream)
|
||||
new_width, new_height = resized_image.size
|
||||
|
||||
# Create a new image object with compressed data
|
||||
new_image_object = StreamObject()
|
||||
new_image_object._data = compressed_image_stream.getvalue()
|
||||
new_image_object.update({
|
||||
NameObject("/Filter"): NameObject("/DCTDecode"),
|
||||
NameObject("/Subtype"): NameObject("/Image"),
|
||||
NameObject("/Width"): NumberObject(new_width),
|
||||
NameObject("/Height"): NumberObject(new_height),
|
||||
NameObject("/ColorSpace"): NameObject("/DeviceRGB"),
|
||||
NameObject("/BitsPerComponent"): NumberObject(8),
|
||||
})
|
||||
|
||||
# Replace the old image with the new one
|
||||
image_objects[img_name] = new_image_object
|
||||
|
||||
pdf_writer.add_page(pdf_page)
|
||||
|
||||
pdf_reader.stream.close()
|
||||
|
||||
print(f"PDF reader closed, now writing the optimized file.")
|
||||
print(f"optimized_pdf_path : {optimized_pdf_path}") # Log the file path
|
||||
with open(optimized_pdf_path, "wb") as output_pdf:
|
||||
pdf_writer.write(output_pdf)
|
||||
|
||||
print(f"Optimized PDF saved at: {optimized_pdf_path}")
|
||||
return optimized_pdf_path
|
||||
|
||||
optimize_pdf_with_images(r"u:\Dropbox\!!!Days\Downloads Z230\Dokumentace\6504140489 2025-03-04 Harsa, Radovan [PZ psychiatrie] [28FEB2025-04MAR2025 vágní suicidální proklamace, zbytečná hospitalizace].pdf",
|
||||
r"u:\Dropbox\!!!Days\Downloads Z230\Dokumentace\Converted")
|
||||
Reference in New Issue
Block a user