32 lines
855 B
Python
32 lines
855 B
Python
|
|
import pdfplumber
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
|
|
pdf_path = Path(r"u:\Dropbox\!!!Days\Downloads Z230\prehled 09_2025 zpmvcr.pdf")
|
|
xlsx_path = pdf_path.with_suffix(".xlsx")
|
|
|
|
all_tables = []
|
|
|
|
with pdfplumber.open(pdf_path) as pdf:
|
|
for i, page in enumerate(pdf.pages, start=1):
|
|
tables = page.extract_tables()
|
|
if not tables:
|
|
continue
|
|
table = tables[0]
|
|
# Convert to DataFrame, first row = header
|
|
df = pd.DataFrame(table[1:], columns=table[0])
|
|
df["page"] = i
|
|
all_tables.append(df)
|
|
|
|
if not all_tables:
|
|
print("❌ No tables found.")
|
|
else:
|
|
df_all = pd.concat(all_tables, ignore_index=True)
|
|
print("✅ Combined shape:", df_all.shape)
|
|
print(df_all.head())
|
|
|
|
# Save to Excel
|
|
df_all.to_excel(xlsx_path, index=False)
|
|
print(f"💾 Saved to: {xlsx_path}")
|