Merge remote-tracking branch 'origin/main'

This commit is contained in:
michaela.buzalkova
2025-10-24 18:56:49 +02:00
8 changed files with 386 additions and 102 deletions

View File

@@ -0,0 +1,24 @@
import pymysql
from pymysql.cursors import DictCursor
conn = pymysql.connect(
host="192.168.1.76",
port=3307,
user="root",
password="Vlado9674+",
database="fio",
charset="utf8mb4",
cursorclass=DictCursor
)
with conn.cursor() as cur:
cur.execute("SHOW TABLES;")
print("📋 Tables:", [r[f"Tables_in_fio"] for r in cur.fetchall()])
cur.execute("SELECT COUNT(*) AS cnt FROM transactions;")
print("🧾 Rows in `transactions`:", cur.fetchone()["cnt"])
cur.execute("SHOW COLUMNS FROM transactions;")
print("\n📊 Columns:")
for r in cur.fetchall():
print(" -", r["Field"])

117
40 Fio 03 excel.py Normal file
View File

@@ -0,0 +1,117 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Export Fio transactions (from MySQL → Excel)
--------------------------------------------
- Reads only cislo_uctu = '2800046620'
- For OZP (protiucet=2070101041) includes only positive objem
- Each sheet = insurance company (filtered by protiucet)
- First sheet = summary with total amounts and transaction counts
"""
import pandas as pd
import pymysql
from pathlib import Path
from datetime import datetime
# ======== CONFIG ========
MYSQL_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "fio",
"charset": "utf8mb4",
}
REPORTOVAT = {
"VZP": "1114007221",
"VOZP": "2010009091",
"ČPZP": "2054108761",
"OZP": "2070101041",
"ZPŠ": "2090309181",
"ZPMV": "2112108031",
}
EXPORT_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230") / f"Fio_report_{datetime.now():%Y-%m-%d_%H-%M-%S}.xlsx"
# ======== LOAD DATA ========
def load_data():
print("🔄 Načítám data z MySQL (účet 2800046620, pro OZP jen kladné objemy)...")
conn = pymysql.connect(**MYSQL_CONFIG)
sql = """
SELECT *
FROM transactions
WHERE cislo_uctu = '2800046620'
AND (
protiucet <> '2070101041'
OR (protiucet = '2070101041' AND objem > 0)
);
"""
df = pd.read_sql(sql, conn)
conn.close()
df.columns = df.columns.str.strip()
print(f"✅ Načteno {len(df)} řádků, {len(df.columns)} sloupců.")
return df
# ======== EXPORT TO EXCEL ========
def export_to_excel(df):
summary_rows = [] # to collect summary per insurer
with pd.ExcelWriter(EXPORT_PATH, engine="openpyxl") as writer:
# --- INDIVIDUAL SHEETS ---
for name, acc in REPORTOVAT.items():
filtered = df[df["protiucet"].astype(str) == acc]
if filtered.empty:
print(f"⚠️ {name}: žádné transakce (účet {acc})")
summary_rows.append({
"Pojišťovna": name,
"Číslo účtu": acc,
"Počet transakcí": 0,
"Součet objemu": 0.0
})
continue
# safe numeric conversion
filtered = filtered.copy()
filtered["objem_num"] = (
filtered["objem"]
.astype(str)
.str.replace("\u00A0", "", regex=False)
.str.replace(",", ".", regex=False)
.astype(float)
)
# --- summary data ---
total_sum = filtered["objem_num"].sum()
total_count = len(filtered)
summary_rows.append({
"Pojišťovna": name,
"Číslo účtu": acc,
"Počet transakcí": total_count,
"Součet objemu": round(total_sum, 2)
})
# --- write detailed sheet ---
filtered.to_excel(writer, index=False, sheet_name=name)
print(f"{name}: {len(filtered)} řádků exportováno, součet {total_sum:,.2f}")
# --- SUMMARY SHEET ---
summary_df = pd.DataFrame(summary_rows)
summary_df["Součet objemu"] = summary_df["Součet objemu"].map("{:,.2f}".format)
summary_df.to_excel(writer, index=False, sheet_name="Přehled")
print("🧾 Přidán přehledový list s celkovými součty.")
print(f"\n📊 Hotovo! Soubor uložen jako:\n{EXPORT_PATH}")
# ======== MAIN ========
if __name__ == "__main__":
df = load_data()
export_to_excel(df)

View File

@@ -2,11 +2,11 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Import Fio banka CSV export (UTF-8, ; separated, quoted) Fio CSV import → MySQL (dev version)
into MySQL database `fio.transactions`. ------------------------------------
- Always drops & recreates `transactions` table
Unique key = (Číslo účtu, ID operace, ID pokynu) - Uses real CSV headers as seen in "Vyhledane pohyby (3).csv"
Duplicates are skipped silently. - Unique key = (Číslo účtu, ID operace, ID pokynu)
""" """
import csv import csv
@@ -14,6 +14,7 @@ from pathlib import Path
from datetime import datetime from datetime import datetime
import pymysql import pymysql
from pymysql.cursors import DictCursor from pymysql.cursors import DictCursor
import re
# ======== CONFIG ======== # ======== CONFIG ========
CSV_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230\Vyhledane pohyby (3).csv") CSV_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230\Vyhledane pohyby (3).csv")
@@ -33,14 +34,12 @@ MYSQL_CONFIG = {
# ======== HELPERS ======== # ======== HELPERS ========
def clean(s: str): def clean(s: str):
"""Trim and normalize text values."""
if not s: if not s:
return None return None
return s.strip() or None return s.strip() or None
def parse_date(raw: str): def parse_date(raw: str):
"""Convert dd.mm.yyyy → date"""
raw = (raw or "").strip() raw = (raw or "").strip()
if not raw: if not raw:
return None return None
@@ -51,10 +50,15 @@ def parse_date(raw: str):
def parse_float(raw: str): def parse_float(raw: str):
"""Convert comma/space separated numbers to float""" if raw is None:
raw = (raw or "").replace(" ", "").replace(",", ".") return None
s = str(raw).strip()
for ch in (" ", "\u00A0", "\u202F", "\u2007"):
s = s.replace(ch, "")
s = s.replace(",", ".")
s = re.sub(r"[^0-9.+-]", "", s)
try: try:
return float(raw) return float(s)
except ValueError: except ValueError:
return None return None
@@ -64,38 +68,43 @@ def get_mysql_connection():
return pymysql.connect(**MYSQL_CONFIG) return pymysql.connect(**MYSQL_CONFIG)
def ensure_table_exists(conn): def recreate_table(conn):
"""Create table if it doesnt exist, with unique key on (cislo_uctu, id_operace, id_pokynu).""" """Drop and recreate table with schema matching CSV structure."""
sql = f""" sql = f"""
CREATE TABLE IF NOT EXISTS `{TABLE_NAME}` ( DROP TABLE IF EXISTS `{TABLE_NAME}`;
CREATE TABLE `{TABLE_NAME}` (
id INT AUTO_INCREMENT PRIMARY KEY, id INT AUTO_INCREMENT PRIMARY KEY,
datum DATE, datum DATE,
castka DECIMAL(14,2), objem DECIMAL(14,2),
akce VARCHAR(100), mena CHAR(3),
cislo_uctu VARCHAR(40), cislo_uctu VARCHAR(40),
id_operace VARCHAR(50),
id_pokynu VARCHAR(50),
protiucet VARCHAR(40), protiucet VARCHAR(40),
nazev_protiuctu VARCHAR(200),
kod_banky VARCHAR(20), kod_banky VARCHAR(20),
ks VARCHAR(20), ks VARCHAR(20),
vs VARCHAR(20), vs VARCHAR(20),
ss VARCHAR(20), ss VARCHAR(20),
zprava_pro_prijemce VARCHAR(500), zprava_pro_prijemce VARCHAR(500),
poznamka VARCHAR(500), poznamka VARCHAR(500),
reference_platce VARCHAR(200), id_operace VARCHAR(50),
typ VARCHAR(100), id_pokynu VARCHAR(50),
upresneni VARCHAR(500), ks_1 VARCHAR(20),
zadal VARCHAR(200),
zdrojovy_ucet VARCHAR(50),
nazev_banky VARCHAR(100), nazev_banky VARCHAR(100),
nazev_protiuctu VARCHAR(200),
ss_1 VARCHAR(20),
typ VARCHAR(100),
upresneni_objem VARCHAR(100),
upresneni_mena VARCHAR(20),
vs_1 VARCHAR(20),
zadal VARCHAR(200),
imported_at DATETIME DEFAULT CURRENT_TIMESTAMP, imported_at DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE KEY uniq_tx (cislo_uctu, id_operace, id_pokynu) UNIQUE KEY uniq_tx (cislo_uctu, id_operace, id_pokynu)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
""" """
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute(sql) for stmt in sql.strip().split(";"):
print(f"✅ Tabulka `{TABLE_NAME}` zkontrolována nebo vytvořena.") if stmt.strip():
cur.execute(stmt)
print(f"✅ Tabulka `{TABLE_NAME}` znovu vytvořena podle CSV struktury.")
# ======== IMPORT ======== # ======== IMPORT ========
@@ -104,35 +113,37 @@ def import_fio_csv():
reader = csv.DictReader(f, delimiter=";", quotechar='"') reader = csv.DictReader(f, delimiter=";", quotechar='"')
rows = list(reader) rows = list(reader)
total_rows = len(rows) total = len(rows)
print(f"📄 Načteno {total_rows} řádků ze souboru {CSV_PATH.name}") print(f"📄 Načteno {total} řádků ze souboru {CSV_PATH.name}")
with get_mysql_connection() as conn: with get_mysql_connection() as conn:
ensure_table_exists(conn) recreate_table(conn)
inserted, skipped = 0, 0 inserted, skipped = 0, 0
for i, row in enumerate(rows, start=1): for i, row in enumerate(rows, start=1):
data = { data = {
"datum": parse_date(row.get("Datum")), "datum": parse_date(row.get("Datum")),
"castka": parse_float(row.get("Částka")), "objem": parse_float(row.get("Objem")),
"akce": clean(row.get("Akce")), "mena": clean(row.get("Měna")),
"cislo_uctu": clean(row.get("Číslo účtu")), "cislo_uctu": clean(row.get("Číslo účtu")),
"id_operace": clean(row.get("ID operace")),
"id_pokynu": clean(row.get("ID pokynu")),
"protiucet": clean(row.get("Protiúčet")), "protiucet": clean(row.get("Protiúčet")),
"nazev_protiuctu": clean(row.get("Název protiúčtu")),
"kod_banky": clean(row.get("Kód banky")), "kod_banky": clean(row.get("Kód banky")),
"ks": clean(row.get("KS")), "ks": clean(row.get("KS")),
"vs": clean(row.get("VS")), "vs": clean(row.get("VS")),
"ss": clean(row.get("SS")), "ss": clean(row.get("SS")),
"zprava_pro_prijemce": clean(row.get("Zpráva pro příjemce")), "zprava_pro_prijemce": clean(row.get("Zpráva pro příjemce")),
"poznamka": clean(row.get("Poznámka")), "poznamka": clean(row.get("Poznámka")),
"reference_platce": clean(row.get("Reference plátce")), "id_operace": clean(row.get("ID operace")),
"typ": clean(row.get("Typ")), "id_pokynu": clean(row.get("ID pokynu")),
"upresneni": clean(row.get("Upřesnění")), "ks_1": clean(row.get("KS.1")),
"zadal": clean(row.get("Zadal")),
"zdrojovy_ucet": clean(row.get("Zdrojový účet")),
"nazev_banky": clean(row.get("Název banky")), "nazev_banky": clean(row.get("Název banky")),
"nazev_protiuctu": clean(row.get("Název protiúčtu")),
"ss_1": clean(row.get("SS.1")),
"typ": clean(row.get("Typ")),
"upresneni_objem": clean(row.get("Upřesnění - objem")),
"upresneni_mena": clean(row.get("Upřesnění - měna")),
"vs_1": clean(row.get("VS.1")),
"zadal": clean(row.get("Zadal")),
} }
cols = ", ".join(data.keys()) cols = ", ".join(data.keys())
@@ -146,21 +157,12 @@ def import_fio_csv():
else: else:
skipped += 1 skipped += 1
# --- progress output --- if i % 500 == 0 or i == total:
if i % 500 == 0 or i == total_rows: print(f" {i}/{total} zpracováno... ({inserted} vloženo, {skipped} duplicit)")
print(f" {i}/{total_rows} zpracováno... ({inserted} vloženo, {skipped} duplicit)")
# summary
with conn.cursor() as cur:
cur.execute(f"SELECT COUNT(*) AS cnt FROM `{TABLE_NAME}`")
total_db = cur.fetchone()["cnt"]
print(f"\n✅ Import dokončen: {inserted} nových, {skipped} duplicit přeskočeno.") print(f"\n✅ Import dokončen: {inserted} nových, {skipped} duplicit přeskočeno.")
print(f"📊 Celkem v databázi: {total_db} záznamů.")
# ======== MAIN ======== # ======== MAIN ========
if __name__ == "__main__": if __name__ == "__main__":
if not CSV_PATH.exists():
raise SystemExit(f"❌ Soubor {CSV_PATH} nenalezen.")
import_fio_csv() import_fio_csv()

View File

@@ -2,46 +2,46 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Diagnostický test: načti Fio CSV a ověř parsování datumu. Quick, verified dump of all Fio transactions from MySQL → Excel.
Nenačítá se do MySQL pouze vypíše výsledek. Column names are exactly as in DB.
""" """
import csv import pandas as pd
from datetime import datetime import pymysql
from pymysql.cursors import DictCursor
from pathlib import Path from pathlib import Path
from datetime import datetime
# ✅ Tvoje cesta k souboru # ======== CONFIG ========
CSV_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230\Vyhledane pohyby (1).csv") MYSQL_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "fio",
"charset": "utf8mb4",
}
def parse_czech_date(s: str): EXPORT_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230") / f"Fio_ALL_{datetime.now():%Y-%m-%d_%H-%M-%S}.xlsx"
"""Očistí řetězec a zkusí dd.mm.yyyy."""
if not s:
return None
s = s.strip().replace("\u00A0", "").replace("\ufeff", "")
try:
return datetime.strptime(s, "%d.%m.%Y").date()
except Exception:
return None
# ======== MAIN ========
def dump_all_transactions():
with pymysql.connect(**MYSQL_CONFIG) as conn:
sql = """
SELECT
*
FROM transactions
ORDER BY datum DESC;
"""
df = pd.read_sql(sql, conn)
def main(): print(f"✅ Načteno {len(df)} transakcí z MySQL.")
with open(CSV_PATH, "r", encoding="utf-8-sig", newline="") as f:
reader = csv.DictReader(f, delimiter=";", quotechar='"')
rows = list(reader)
print(f"Načteno {len(rows)} řádků.\n") # Save to Excel
print("Ukázka prvních 10 řádků s hodnotou Datum:\n") df.to_excel(EXPORT_PATH, index=False)
print(f"📊 Excel export hotov:\n{EXPORT_PATH}")
for i, row in enumerate(rows[:10], start=1):
raw = row.get("Datum")
parsed = parse_czech_date(raw)
print(f"{i:02d}. raw={repr(raw)} -> parsed={parsed}")
input("\n🔸 Stiskni Enter pro pokračování nebo ukončení... ")
if __name__ == "__main__": if __name__ == "__main__":
if not CSV_PATH.exists(): dump_all_transactions()
raise SystemExit(f"❌ Soubor {CSV_PATH} nenalezen.")
main()

View File

@@ -20,26 +20,20 @@ from pathlib import Path
from striprtf.striprtf import rtf_to_text from striprtf.striprtf import rtf_to_text
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter from openpyxl.utils import get_column_letter
from Functions import get_medicus_connection
# ================== CONFIGURATION ================== # ================== CONFIGURATION ==================
FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB" FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB"
EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230") EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
# calculate last 2 months dynamically # calculate last 2 months dynamically (now set to 10 days for testing)
DATE_FROM = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d") DATE_FROM = (datetime.now() - timedelta(days=10)).strftime("%Y-%m-%d")
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
xlsx_path = EXPORT_DIR / f"{timestamp}_Dekurz (poslední rok).xlsx" xlsx_path = EXPORT_DIR / f"{timestamp}_Dekurz (poslední rok).xlsx"
# ================== FIREBIRD CONNECTION ================== # ================== FIREBIRD CONNECTION ==================
conn = fb.connect( conn = get_medicus_connection()
host="192.168.1.4",
port=3050,
database=FDB_PATH,
user="SYSDBA",
password="masterkey",
charset="WIN1250",
)
def query_df(sql, params=None): def query_df(sql, params=None):
cur = conn.cursor() cur = conn.cursor()
@@ -85,21 +79,29 @@ def safe_rtf_to_text(x):
df["DEKURS"] = df["DEKURS"].apply(safe_rtf_to_text) df["DEKURS"] = df["DEKURS"].apply(safe_rtf_to_text)
df.replace({r'(\r\n|\r|\n)': r'\r\n'}, regex=True, inplace=True) # --- Normalize and clean newlines ---
df.replace({r'[\ud800-\udfff\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]+': ''}, df["DEKURS"] = (
regex=True, inplace=True) df["DEKURS"]
df.replace({r'(\r\n){2,}': r'\r\n', r'(\r\n)+$': ''}, .replace(r"(\r\n|\r|\n)+", "\n", regex=True) # unify newlines
regex=True, inplace=True) .replace(r"\n{2,}", "\n", regex=True) # collapse multiple blank lines
.str.strip() # trim leading/trailing blanks
)
# --- Remove invalid control characters ---
df.replace({r"[\ud800-\udfff\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]+": ""}, regex=True, inplace=True)
# --- Merge patient name ---
df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("") df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("")
df.drop(columns=["PRIJMENI", "JMENO"], inplace=True) df.drop(columns=["PRIJMENI", "JMENO"], inplace=True)
# --- Rename and format columns ---
df.rename(columns={"ZKRATKA": "LEKAR", "VYKONY_DNE": "VYKONY DNE"}, inplace=True) df.rename(columns={"ZKRATKA": "LEKAR", "VYKONY_DNE": "VYKONY DNE"}, inplace=True)
df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce").dt.date df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce").dt.date
df.drop(columns=[c for c in df.columns if "ASCII" in c.upper()], inplace=True, errors="ignore") df.drop(columns=[c for c in df.columns if "ASCII" in c.upper()], inplace=True, errors="ignore")
desired_order = ["DATUM", "RODCIS", "PACIENT", "LEKAR", "VYKONY DNE", "DEKURS"] desired_order = ["DATUM", "RODCIS", "PACIENT", "LEKAR", "VYKONY DNE", "DEKURS"]
df = df[[c for c in desired_order if c in df.columns]] df = df[[c for c in desired_order if c in df.columns]]
# ================== CLEANUP OLD FILES ================== # ================== CLEANUP OLD FILES ==================
for old_file in EXPORT_DIR.glob("*Dekurz (poslední rok)*.xlsx"): for old_file in EXPORT_DIR.glob("*Dekurz (poslední rok)*.xlsx"):
try: try:
@@ -107,11 +109,12 @@ for old_file in EXPORT_DIR.glob("*Dekurz (poslední rok)*.xlsx"):
print(f"🧹 Deleted old file: {old_file.name}") print(f"🧹 Deleted old file: {old_file.name}")
except Exception as e: except Exception as e:
print(f"⚠️ Could not delete {old_file.name}: {e}") print(f"⚠️ Could not delete {old_file.name}: {e}")
# ================== EXPORT TO EXCEL ================== # ================== EXPORT TO EXCEL ==================
with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer: with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
df.to_excel(writer, index=False, sheet_name="Dekurz") df.to_excel(writer, index=False, sheet_name="Dekurz")
ws = writer.sheets["Dekurz"] ws = writer.sheets["Dekurz"]
ws.freeze_panes = "F2" #zamčení prvního řádku a sloupců A:F
# ----- Bright yellow header ----- # ----- Bright yellow header -----
header_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid") header_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
for cell in ws[1]: for cell in ws[1]:

View File

@@ -7,14 +7,10 @@ Show decoded physician notes (RTF → plain text) directly in console.
import fdb import fdb
from striprtf.striprtf import rtf_to_text from striprtf.striprtf import rtf_to_text
from Functions import get_medicus_connection
# ===== connection ===== # ===== connection =====
con = fdb.connect( con = get_medicus_connection()
dsn='localhost:z:\\Medicus 3\\data\\medicus.fdb',
user='sysdba',
password='masterkey',
charset='WIN1250'
)
cur = con.cursor() cur = con.cursor()
# ===== pick a few recent records ===== # ===== pick a few recent records =====

31
70 PDF read ZPMVCR.py Normal file
View File

@@ -0,0 +1,31 @@
import pdfplumber
import pandas as pd
from pathlib import Path
pdf_path = Path(r"u:\Dropbox\!!!Days\Downloads Z230\prehled 09_2025 zpmvcr.pdf")
xlsx_path = pdf_path.with_suffix(".xlsx")
all_tables = []
with pdfplumber.open(pdf_path) as pdf:
for i, page in enumerate(pdf.pages, start=1):
tables = page.extract_tables()
if not tables:
continue
table = tables[0]
# Convert to DataFrame, first row = header
df = pd.DataFrame(table[1:], columns=table[0])
df["page"] = i
all_tables.append(df)
if not all_tables:
print("❌ No tables found.")
else:
df_all = pd.concat(all_tables, ignore_index=True)
print("✅ Combined shape:", df_all.shape)
print(df_all.head())
# Save to Excel
df_all.to_excel(xlsx_path, index=False)
print(f"💾 Saved to: {xlsx_path}")

View File

@@ -0,0 +1,111 @@
import re
import pandas as pd
import firebirdsql as fb
from datetime import datetime, timedelta
from pathlib import Path
from striprtf.striprtf import rtf_to_text
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
import textwrap
# ================== CONFIGURATION ==================
FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB"
EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
# ================== FIREBIRD CONNECTION ==================
conn = fb.connect(
host="192.168.1.4",
port=3050,
database=FDB_PATH,
user="SYSDBA",
password="masterkey",
charset="WIN1250",
)
def query_df(sql, params=None):
cur = conn.cursor()
cur.execute(sql, params or ())
rows = cur.fetchall()
cols = [d[0].strip().lower() for d in cur.description] # 👈 normalize
return pd.DataFrame(rows, columns=cols)
# ================== QUERY ==================
sql = """
SELECT id, cisfak, poj, datkapod, datkapdo, kapdetail
FROM fak
WHERE EXTRACT(YEAR FROM datkapod) = 2025
AND EXTRACT(MONTH FROM datkapod) = 9
AND poj = '211'
"""
df = query_df(sql)
# print(df.columns.tolist())
# print(df.head())
# Display the first KAPDETAIL text, wrapped nicely
kap_text = df.loc[0, 'kapdetail']
# print("\n".join(textwrap.wrap(kap_text, width=100)))
# ========== SOURCE STRING ==========
text = kap_text
# Extract triplets: patientid;age|price;
pattern = r"(\d{4});(\d{1,3})\|([\d.]+);"
matches = re.findall(pattern, text)
# Create dataframe
dfdavka = pd.DataFrame(matches, columns=["patientid", "age", "price"])
# ✅ Fix small typo: use dfdavka (not df) for conversions
dfdavka["patientid"] = dfdavka["patientid"].astype(int)
dfdavka["age"] = dfdavka["age"].astype(int)
dfdavka["price"] = dfdavka["price"].astype(float)
# ========== LOAD KAR ==========
sql = "SELECT idpac, rodcis FROM kar"
dfkar = query_df(sql)
# ========== MERGE ==========
dfmerged = pd.merge(dfdavka, dfkar, left_on="patientid", right_on="idpac", how="left")
# Optional: reorder columns
dfmerged = dfmerged[["patientid", "rodcis", "age", "price"]]
print(dfmerged.head())
print(dfmerged.info())
# ========== OPTIONAL EXPORT ==========
# outfile = EXPORT_DIR / "kapdetail_merged.xlsx"
# dfmerged.to_excel(outfile, index=False)
# print(f"✅ Exported to {outfile}")
# ========== 1⃣ Load Excel and prepare dfpoj ==========
xlsx_path = Path(r"u:\Dropbox\!!!Days\Downloads Z230\prehled 09_2025 zpmvcr.xlsx")
df = pd.read_excel(xlsx_path)
print("Columns in Excel:", df.columns.tolist())
# Select 2nd column (index 1)
dfpoj = df.iloc[:, [1]].copy()
dfpoj.columns = ["rodcis"] # give proper name
dfpoj["rodcis"] = dfpoj["rodcis"].astype(str).str.strip()
print(dfpoj.head())
# ========== 2⃣ Compare dfmerged vs dfpoj ==========
dfmerged["rodcis"] = dfmerged["rodcis"].astype(str).str.strip()
# Find those in dfmerged but not in dfpoj
df_missing = dfmerged[~dfmerged["rodcis"].isin(dfpoj["rodcis"])].copy()
print(f"❌ Počet pacientů v dfmerged, kteří NEJSOU v dfpoj: {len(df_missing)}")
print(df_missing.head())
# ========== 3⃣ (Optional) Export differences ==========
EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230")
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
xlsx_out = EXPORT_DIR / f"chybějící_pacienti_{timestamp}.xlsx"
df_missing.to_excel(xlsx_out, index=False)
print(f"✅ Výsledek uložen do {xlsx_out}")