Z230
This commit is contained in:
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Compress PDF — output DPI and JPEG quality are chosen automatically
|
||||
based on the detected resolution of the source PDF.
|
||||
|
||||
Usage: python compress_pdf.py <input.pdf> [output.pdf]
|
||||
python compress_pdf.py (processes all PDFs in current folder)
|
||||
Output filename: original_name (139 kB).pdf
|
||||
"""
|
||||
|
||||
import sys
|
||||
import fitz
|
||||
from pathlib import Path
|
||||
|
||||
# ==============================
|
||||
# COMPRESSION TABLE
|
||||
# Detected source DPI -> (output DPI, JPEG quality)
|
||||
# Rows are evaluated top-to-bottom; first match wins.
|
||||
# ==============================
|
||||
#
|
||||
# src_dpi_min src_dpi_max out_dpi jpeg_quality
|
||||
COMPRESSION_TABLE = [
|
||||
( 0, 99, 72, 60), # very low res — already small, compress hard
|
||||
( 100, 149, 100, 70), # low res
|
||||
( 150, 249, 150, 80), # standard scan (our tested sweet spot)
|
||||
( 250, 399, 150, 80), # good scan — downsample to 150 is fine
|
||||
( 400, 599, 200, 85), # high res scan
|
||||
( 600, 9999, 150, 80), # very high res / professional scan
|
||||
]
|
||||
|
||||
|
||||
def detect_source_dpi(src: fitz.Document) -> int:
|
||||
"""Estimate source DPI from the largest image on the first page."""
|
||||
page = src[0]
|
||||
images = page.get_images(full=True)
|
||||
if not images:
|
||||
return 150 # no raster images — use default
|
||||
|
||||
# Find the largest image by pixel area
|
||||
best = max(images, key=lambda img: img[2] * img[3]) # width * height
|
||||
img_w_px, img_h_px = best[2], best[3]
|
||||
|
||||
# Page size in inches (1 point = 1/72 inch)
|
||||
page_w_in = page.rect.width / 72.0
|
||||
page_h_in = page.rect.height / 72.0
|
||||
|
||||
dpi_x = img_w_px / page_w_in if page_w_in else 0
|
||||
dpi_y = img_h_px / page_h_in if page_h_in else 0
|
||||
return round((dpi_x + dpi_y) / 2)
|
||||
|
||||
|
||||
def pick_settings(source_dpi: int) -> tuple[int, int]:
|
||||
for min_dpi, max_dpi, out_dpi, quality in COMPRESSION_TABLE:
|
||||
if min_dpi <= source_dpi <= max_dpi:
|
||||
return out_dpi, quality
|
||||
# fallback to last row
|
||||
return COMPRESSION_TABLE[-1][2], COMPRESSION_TABLE[-1][3]
|
||||
|
||||
|
||||
def compress(input_path: Path, output_path: Path = None):
|
||||
src = fitz.open(input_path)
|
||||
|
||||
source_dpi = detect_source_dpi(src)
|
||||
out_dpi, jpeg_quality = pick_settings(source_dpi)
|
||||
|
||||
print(f" zdroj ~{source_dpi} DPI -> komprese {out_dpi} DPI / JPEG q{jpeg_quality}")
|
||||
|
||||
zoom = out_dpi / 72.0
|
||||
mat = fitz.Matrix(zoom, zoom)
|
||||
|
||||
out_doc = fitz.open()
|
||||
for page in src:
|
||||
pix = page.get_pixmap(matrix=mat, colorspace=fitz.csRGB)
|
||||
img_bytes = pix.tobytes("jpeg", jpg_quality=jpeg_quality)
|
||||
img_doc = fitz.open("pdf", fitz.open("jpeg", img_bytes).convert_to_pdf())
|
||||
rect = page.rect
|
||||
new_page = out_doc.new_page(width=rect.width, height=rect.height)
|
||||
new_page.show_pdf_page(new_page.rect, img_doc, 0)
|
||||
src.close()
|
||||
|
||||
tmp = input_path.with_suffix(".tmp.pdf")
|
||||
out_doc.save(tmp, deflate=True, garbage=4)
|
||||
out_doc.close()
|
||||
|
||||
size_kb = round(tmp.stat().st_size / 1024)
|
||||
|
||||
if output_path is None:
|
||||
output_path = input_path.parent / f"{input_path.stem} ({size_kb} kB).pdf"
|
||||
|
||||
if output_path.exists():
|
||||
output_path.unlink()
|
||||
tmp.rename(output_path)
|
||||
|
||||
orig_kb = round(input_path.stat().st_size / 1024)
|
||||
saving = (1 - size_kb / orig_kb) * 100
|
||||
print(f" {input_path.name} -> {output_path.name} (bylo {orig_kb} kB, uspora {saving:.0f}%)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) >= 2:
|
||||
inp = Path(sys.argv[1])
|
||||
out = Path(sys.argv[2]) if len(sys.argv) >= 3 else None
|
||||
compress(inp, out)
|
||||
else:
|
||||
folder = Path(__file__).parent
|
||||
pdfs = [p for p in folder.glob("*.pdf") if not p.name.endswith(").pdf") and p.stem != Path(__file__).stem]
|
||||
if not pdfs:
|
||||
print("Zadne PDF k zpracovani.")
|
||||
for pdf in pdfs:
|
||||
compress(pdf)
|
||||
Reference in New Issue
Block a user