From d8d3de7949d2f17ae4dc9f8d8c79ce8e825260f1 Mon Sep 17 00:00:00 2001 From: "vladimir.buzalka" Date: Wed, 22 Oct 2025 16:45:33 +0200 Subject: [PATCH 1/3] z230 --- 40 Fio 02 diagnostika 2.py | 24 ++++++++ 40 Fio 03 excel.py | 117 +++++++++++++++++++++++++++++++++++++ 40 fio 01.py | 100 +++++++++++++++---------------- 40 fio 02 diagnostika.py | 62 ++++++++++---------- 50 Dekurs.py | 2 +- 70 PDF read ZPMVCR.py | 31 ++++++++++ 6 files changed, 255 insertions(+), 81 deletions(-) create mode 100644 40 Fio 02 diagnostika 2.py create mode 100644 40 Fio 03 excel.py create mode 100644 70 PDF read ZPMVCR.py diff --git a/40 Fio 02 diagnostika 2.py b/40 Fio 02 diagnostika 2.py new file mode 100644 index 0000000..c5eb899 --- /dev/null +++ b/40 Fio 02 diagnostika 2.py @@ -0,0 +1,24 @@ +import pymysql +from pymysql.cursors import DictCursor + +conn = pymysql.connect( + host="192.168.1.76", + port=3307, + user="root", + password="Vlado9674+", + database="fio", + charset="utf8mb4", + cursorclass=DictCursor +) + +with conn.cursor() as cur: + cur.execute("SHOW TABLES;") + print("📋 Tables:", [r[f"Tables_in_fio"] for r in cur.fetchall()]) + + cur.execute("SELECT COUNT(*) AS cnt FROM transactions;") + print("🧾 Rows in `transactions`:", cur.fetchone()["cnt"]) + + cur.execute("SHOW COLUMNS FROM transactions;") + print("\n📊 Columns:") + for r in cur.fetchall(): + print(" -", r["Field"]) diff --git a/40 Fio 03 excel.py b/40 Fio 03 excel.py new file mode 100644 index 0000000..4b9652e --- /dev/null +++ b/40 Fio 03 excel.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Export Fio transactions (from MySQL → Excel) +-------------------------------------------- +- Reads only cislo_uctu = '2800046620' +- For OZP (protiucet=2070101041) includes only positive objem +- Each sheet = insurance company (filtered by protiucet) +- First sheet = summary with total amounts and transaction counts +""" + +import pandas as pd +import pymysql +from pathlib import Path +from datetime import datetime + +# ======== CONFIG ======== +MYSQL_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "fio", + "charset": "utf8mb4", +} + +REPORTOVAT = { + "VZP": "1114007221", + "VOZP": "2010009091", + "ČPZP": "2054108761", + "OZP": "2070101041", + "ZPŠ": "2090309181", + "ZPMV": "2112108031", +} + +EXPORT_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230") / f"Fio_report_{datetime.now():%Y-%m-%d_%H-%M-%S}.xlsx" + + +# ======== LOAD DATA ======== +def load_data(): + print("🔄 Načítám data z MySQL (účet 2800046620, pro OZP jen kladné objemy)...") + conn = pymysql.connect(**MYSQL_CONFIG) + + sql = """ + SELECT * + FROM transactions + WHERE cislo_uctu = '2800046620' + AND ( + protiucet <> '2070101041' + OR (protiucet = '2070101041' AND objem > 0) + ); + """ + df = pd.read_sql(sql, conn) + conn.close() + + df.columns = df.columns.str.strip() + print(f"✅ Načteno {len(df)} řádků, {len(df.columns)} sloupců.") + return df + + +# ======== EXPORT TO EXCEL ======== +def export_to_excel(df): + summary_rows = [] # to collect summary per insurer + + with pd.ExcelWriter(EXPORT_PATH, engine="openpyxl") as writer: + # --- INDIVIDUAL SHEETS --- + for name, acc in REPORTOVAT.items(): + filtered = df[df["protiucet"].astype(str) == acc] + if filtered.empty: + print(f"⚠️ {name}: žádné transakce (účet {acc})") + summary_rows.append({ + "Pojišťovna": name, + "Číslo účtu": acc, + "Počet transakcí": 0, + "Součet objemu": 0.0 + }) + continue + + # safe numeric conversion + filtered = filtered.copy() + filtered["objem_num"] = ( + filtered["objem"] + .astype(str) + .str.replace("\u00A0", "", regex=False) + .str.replace(",", ".", regex=False) + .astype(float) + ) + + # --- summary data --- + total_sum = filtered["objem_num"].sum() + total_count = len(filtered) + + summary_rows.append({ + "Pojišťovna": name, + "Číslo účtu": acc, + "Počet transakcí": total_count, + "Součet objemu": round(total_sum, 2) + }) + + # --- write detailed sheet --- + filtered.to_excel(writer, index=False, sheet_name=name) + print(f"✅ {name}: {len(filtered)} řádků exportováno, součet {total_sum:,.2f} Kč") + + # --- SUMMARY SHEET --- + summary_df = pd.DataFrame(summary_rows) + summary_df["Součet objemu"] = summary_df["Součet objemu"].map("{:,.2f} Kč".format) + summary_df.to_excel(writer, index=False, sheet_name="Přehled") + print("🧾 Přidán přehledový list s celkovými součty.") + + print(f"\n📊 Hotovo! Soubor uložen jako:\n{EXPORT_PATH}") + + +# ======== MAIN ======== +if __name__ == "__main__": + df = load_data() + export_to_excel(df) diff --git a/40 fio 01.py b/40 fio 01.py index 1b219ce..81c60e8 100644 --- a/40 fio 01.py +++ b/40 fio 01.py @@ -2,11 +2,11 @@ # -*- coding: utf-8 -*- """ -Import Fio banka CSV export (UTF-8, ; separated, quoted) -into MySQL database `fio.transactions`. - -Unique key = (Číslo účtu, ID operace, ID pokynu) -Duplicates are skipped silently. +Fio CSV import → MySQL (dev version) +------------------------------------ +- Always drops & recreates `transactions` table +- Uses real CSV headers as seen in "Vyhledane pohyby (3).csv" +- Unique key = (Číslo účtu, ID operace, ID pokynu) """ import csv @@ -14,6 +14,7 @@ from pathlib import Path from datetime import datetime import pymysql from pymysql.cursors import DictCursor +import re # ======== CONFIG ======== CSV_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230\Vyhledane pohyby (3).csv") @@ -33,14 +34,12 @@ MYSQL_CONFIG = { # ======== HELPERS ======== def clean(s: str): - """Trim and normalize text values.""" if not s: return None return s.strip() or None def parse_date(raw: str): - """Convert dd.mm.yyyy → date""" raw = (raw or "").strip() if not raw: return None @@ -51,10 +50,15 @@ def parse_date(raw: str): def parse_float(raw: str): - """Convert comma/space separated numbers to float""" - raw = (raw or "").replace(" ", "").replace(",", ".") + if raw is None: + return None + s = str(raw).strip() + for ch in (" ", "\u00A0", "\u202F", "\u2007"): + s = s.replace(ch, "") + s = s.replace(",", ".") + s = re.sub(r"[^0-9.+-]", "", s) try: - return float(raw) + return float(s) except ValueError: return None @@ -64,38 +68,43 @@ def get_mysql_connection(): return pymysql.connect(**MYSQL_CONFIG) -def ensure_table_exists(conn): - """Create table if it doesn’t exist, with unique key on (cislo_uctu, id_operace, id_pokynu).""" +def recreate_table(conn): + """Drop and recreate table with schema matching CSV structure.""" sql = f""" - CREATE TABLE IF NOT EXISTS `{TABLE_NAME}` ( + DROP TABLE IF EXISTS `{TABLE_NAME}`; + CREATE TABLE `{TABLE_NAME}` ( id INT AUTO_INCREMENT PRIMARY KEY, datum DATE, - castka DECIMAL(14,2), - akce VARCHAR(100), + objem DECIMAL(14,2), + mena CHAR(3), cislo_uctu VARCHAR(40), - id_operace VARCHAR(50), - id_pokynu VARCHAR(50), protiucet VARCHAR(40), - nazev_protiuctu VARCHAR(200), kod_banky VARCHAR(20), ks VARCHAR(20), vs VARCHAR(20), ss VARCHAR(20), zprava_pro_prijemce VARCHAR(500), poznamka VARCHAR(500), - reference_platce VARCHAR(200), - typ VARCHAR(100), - upresneni VARCHAR(500), - zadal VARCHAR(200), - zdrojovy_ucet VARCHAR(50), + id_operace VARCHAR(50), + id_pokynu VARCHAR(50), + ks_1 VARCHAR(20), nazev_banky VARCHAR(100), + nazev_protiuctu VARCHAR(200), + ss_1 VARCHAR(20), + typ VARCHAR(100), + upresneni_objem VARCHAR(100), + upresneni_mena VARCHAR(20), + vs_1 VARCHAR(20), + zadal VARCHAR(200), imported_at DATETIME DEFAULT CURRENT_TIMESTAMP, UNIQUE KEY uniq_tx (cislo_uctu, id_operace, id_pokynu) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; """ with conn.cursor() as cur: - cur.execute(sql) - print(f"✅ Tabulka `{TABLE_NAME}` zkontrolována nebo vytvořena.") + for stmt in sql.strip().split(";"): + if stmt.strip(): + cur.execute(stmt) + print(f"✅ Tabulka `{TABLE_NAME}` znovu vytvořena podle CSV struktury.") # ======== IMPORT ======== @@ -104,35 +113,37 @@ def import_fio_csv(): reader = csv.DictReader(f, delimiter=";", quotechar='"') rows = list(reader) - total_rows = len(rows) - print(f"📄 Načteno {total_rows} řádků ze souboru {CSV_PATH.name}") + total = len(rows) + print(f"📄 Načteno {total} řádků ze souboru {CSV_PATH.name}") with get_mysql_connection() as conn: - ensure_table_exists(conn) + recreate_table(conn) inserted, skipped = 0, 0 for i, row in enumerate(rows, start=1): data = { "datum": parse_date(row.get("Datum")), - "castka": parse_float(row.get("Částka")), - "akce": clean(row.get("Akce")), + "objem": parse_float(row.get("Objem")), + "mena": clean(row.get("Měna")), "cislo_uctu": clean(row.get("Číslo účtu")), - "id_operace": clean(row.get("ID operace")), - "id_pokynu": clean(row.get("ID pokynu")), "protiucet": clean(row.get("Protiúčet")), - "nazev_protiuctu": clean(row.get("Název protiúčtu")), "kod_banky": clean(row.get("Kód banky")), "ks": clean(row.get("KS")), "vs": clean(row.get("VS")), "ss": clean(row.get("SS")), "zprava_pro_prijemce": clean(row.get("Zpráva pro příjemce")), "poznamka": clean(row.get("Poznámka")), - "reference_platce": clean(row.get("Reference plátce")), - "typ": clean(row.get("Typ")), - "upresneni": clean(row.get("Upřesnění")), - "zadal": clean(row.get("Zadal")), - "zdrojovy_ucet": clean(row.get("Zdrojový účet")), + "id_operace": clean(row.get("ID operace")), + "id_pokynu": clean(row.get("ID pokynu")), + "ks_1": clean(row.get("KS.1")), "nazev_banky": clean(row.get("Název banky")), + "nazev_protiuctu": clean(row.get("Název protiúčtu")), + "ss_1": clean(row.get("SS.1")), + "typ": clean(row.get("Typ")), + "upresneni_objem": clean(row.get("Upřesnění - objem")), + "upresneni_mena": clean(row.get("Upřesnění - měna")), + "vs_1": clean(row.get("VS.1")), + "zadal": clean(row.get("Zadal")), } cols = ", ".join(data.keys()) @@ -146,21 +157,12 @@ def import_fio_csv(): else: skipped += 1 - # --- progress output --- - if i % 500 == 0 or i == total_rows: - print(f" {i}/{total_rows} zpracováno... ({inserted} vloženo, {skipped} duplicit)") - - # summary - with conn.cursor() as cur: - cur.execute(f"SELECT COUNT(*) AS cnt FROM `{TABLE_NAME}`") - total_db = cur.fetchone()["cnt"] + if i % 500 == 0 or i == total: + print(f" {i}/{total} zpracováno... ({inserted} vloženo, {skipped} duplicit)") print(f"\n✅ Import dokončen: {inserted} nových, {skipped} duplicit přeskočeno.") - print(f"📊 Celkem v databázi: {total_db} záznamů.") # ======== MAIN ======== if __name__ == "__main__": - if not CSV_PATH.exists(): - raise SystemExit(f"❌ Soubor {CSV_PATH} nenalezen.") import_fio_csv() diff --git a/40 fio 02 diagnostika.py b/40 fio 02 diagnostika.py index bdbc79a..2fe5f64 100644 --- a/40 fio 02 diagnostika.py +++ b/40 fio 02 diagnostika.py @@ -2,46 +2,46 @@ # -*- coding: utf-8 -*- """ -Diagnostický test: načti Fio CSV a ověř parsování datumu. -Nenačítá se do MySQL – pouze vypíše výsledek. +Quick, verified dump of all Fio transactions from MySQL → Excel. +Column names are exactly as in DB. """ -import csv -from datetime import datetime +import pandas as pd +import pymysql +from pymysql.cursors import DictCursor from pathlib import Path +from datetime import datetime -# ✅ Tvoje cesta k souboru -CSV_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230\Vyhledane pohyby (1).csv") +# ======== CONFIG ======== +MYSQL_CONFIG = { + "host": "192.168.1.76", + "port": 3307, + "user": "root", + "password": "Vlado9674+", + "database": "fio", + "charset": "utf8mb4", +} -def parse_czech_date(s: str): - """Očistí řetězec a zkusí dd.mm.yyyy.""" - if not s: - return None - s = s.strip().replace("\u00A0", "").replace("\ufeff", "") - try: - return datetime.strptime(s, "%d.%m.%Y").date() - except Exception: - return None +EXPORT_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230") / f"Fio_ALL_{datetime.now():%Y-%m-%d_%H-%M-%S}.xlsx" +# ======== MAIN ======== +def dump_all_transactions(): + with pymysql.connect(**MYSQL_CONFIG) as conn: + sql = """ + SELECT + * + FROM transactions + ORDER BY datum DESC; + """ + df = pd.read_sql(sql, conn) -def main(): - with open(CSV_PATH, "r", encoding="utf-8-sig", newline="") as f: - reader = csv.DictReader(f, delimiter=";", quotechar='"') - rows = list(reader) + print(f"✅ Načteno {len(df)} transakcí z MySQL.") - print(f"Načteno {len(rows)} řádků.\n") - print("Ukázka prvních 10 řádků s hodnotou Datum:\n") - - for i, row in enumerate(rows[:10], start=1): - raw = row.get("Datum") - parsed = parse_czech_date(raw) - print(f"{i:02d}. raw={repr(raw)} -> parsed={parsed}") - - input("\n🔸 Stiskni Enter pro pokračování nebo ukončení... ") + # Save to Excel + df.to_excel(EXPORT_PATH, index=False) + print(f"📊 Excel export hotov:\n{EXPORT_PATH}") if __name__ == "__main__": - if not CSV_PATH.exists(): - raise SystemExit(f"❌ Soubor {CSV_PATH} nenalezen.") - main() + dump_all_transactions() diff --git a/50 Dekurs.py b/50 Dekurs.py index b8cf489..f95261a 100644 --- a/50 Dekurs.py +++ b/50 Dekurs.py @@ -26,7 +26,7 @@ FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB" EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230") # calculate last 2 months dynamically -DATE_FROM = (datetime.now() - timedelta(days=365)).strftime("%Y-%m-%d") +DATE_FROM = (datetime.now() - timedelta(days=10)).strftime("%Y-%m-%d") timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") xlsx_path = EXPORT_DIR / f"{timestamp}_Dekurz (poslední rok).xlsx" diff --git a/70 PDF read ZPMVCR.py b/70 PDF read ZPMVCR.py new file mode 100644 index 0000000..aae9912 --- /dev/null +++ b/70 PDF read ZPMVCR.py @@ -0,0 +1,31 @@ + +import pdfplumber +import pandas as pd +from pathlib import Path + +pdf_path = Path(r"u:\Dropbox\!!!Days\Downloads Z230\prehled 09_2025 zpmvcr.pdf") +xlsx_path = pdf_path.with_suffix(".xlsx") + +all_tables = [] + +with pdfplumber.open(pdf_path) as pdf: + for i, page in enumerate(pdf.pages, start=1): + tables = page.extract_tables() + if not tables: + continue + table = tables[0] + # Convert to DataFrame, first row = header + df = pd.DataFrame(table[1:], columns=table[0]) + df["page"] = i + all_tables.append(df) + +if not all_tables: + print("❌ No tables found.") +else: + df_all = pd.concat(all_tables, ignore_index=True) + print("✅ Combined shape:", df_all.shape) + print(df_all.head()) + + # Save to Excel + df_all.to_excel(xlsx_path, index=False) + print(f"💾 Saved to: {xlsx_path}") From 45ecfe96e89c2daa7dcd2badab130038fe3233e7 Mon Sep 17 00:00:00 2001 From: "vladimir.buzalka" Date: Thu, 23 Oct 2025 08:13:20 +0200 Subject: [PATCH 2/3] z230 --- 80 CtiKapitaceZTabulkyFAK.py | 111 +++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 80 CtiKapitaceZTabulkyFAK.py diff --git a/80 CtiKapitaceZTabulkyFAK.py b/80 CtiKapitaceZTabulkyFAK.py new file mode 100644 index 0000000..6aced92 --- /dev/null +++ b/80 CtiKapitaceZTabulkyFAK.py @@ -0,0 +1,111 @@ +import re +import pandas as pd +import firebirdsql as fb +from datetime import datetime, timedelta +from pathlib import Path +from striprtf.striprtf import rtf_to_text +from openpyxl.styles import Font, Alignment, PatternFill, Border, Side +from openpyxl.utils import get_column_letter +import textwrap + +# ================== CONFIGURATION ================== +FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB" +EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230") + +# ================== FIREBIRD CONNECTION ================== +conn = fb.connect( + host="192.168.1.4", + port=3050, + database=FDB_PATH, + user="SYSDBA", + password="masterkey", + charset="WIN1250", +) + +def query_df(sql, params=None): + cur = conn.cursor() + cur.execute(sql, params or ()) + rows = cur.fetchall() + cols = [d[0].strip().lower() for d in cur.description] # 👈 normalize + return pd.DataFrame(rows, columns=cols) +# ================== QUERY ================== +sql = """ +SELECT id, cisfak, poj, datkapod, datkapdo, kapdetail +FROM fak +WHERE EXTRACT(YEAR FROM datkapod) = 2025 + AND EXTRACT(MONTH FROM datkapod) = 9 + AND poj = '211' +""" +df = query_df(sql) + +# print(df.columns.tolist()) +# print(df.head()) + +# Display the first KAPDETAIL text, wrapped nicely +kap_text = df.loc[0, 'kapdetail'] +# print("\n".join(textwrap.wrap(kap_text, width=100))) + +# ========== SOURCE STRING ========== +text = kap_text + +# Extract triplets: patientid;age|price; +pattern = r"(\d{4});(\d{1,3})\|([\d.]+);" +matches = re.findall(pattern, text) + +# Create dataframe +dfdavka = pd.DataFrame(matches, columns=["patientid", "age", "price"]) + +# ✅ Fix small typo: use dfdavka (not df) for conversions +dfdavka["patientid"] = dfdavka["patientid"].astype(int) +dfdavka["age"] = dfdavka["age"].astype(int) +dfdavka["price"] = dfdavka["price"].astype(float) + +# ========== LOAD KAR ========== +sql = "SELECT idpac, rodcis FROM kar" +dfkar = query_df(sql) + +# ========== MERGE ========== +dfmerged = pd.merge(dfdavka, dfkar, left_on="patientid", right_on="idpac", how="left") + +# Optional: reorder columns +dfmerged = dfmerged[["patientid", "rodcis", "age", "price"]] + +print(dfmerged.head()) +print(dfmerged.info()) + +# ========== OPTIONAL EXPORT ========== +# outfile = EXPORT_DIR / "kapdetail_merged.xlsx" +# dfmerged.to_excel(outfile, index=False) +# print(f"✅ Exported to {outfile}") + + + +# ========== 1️⃣ Load Excel and prepare dfpoj ========== +xlsx_path = Path(r"u:\Dropbox\!!!Days\Downloads Z230\prehled 09_2025 zpmvcr.xlsx") +df = pd.read_excel(xlsx_path) + +print("Columns in Excel:", df.columns.tolist()) + +# Select 2nd column (index 1) +dfpoj = df.iloc[:, [1]].copy() +dfpoj.columns = ["rodcis"] # give proper name +dfpoj["rodcis"] = dfpoj["rodcis"].astype(str).str.strip() + +print(dfpoj.head()) + +# ========== 2️⃣ Compare dfmerged vs dfpoj ========== +dfmerged["rodcis"] = dfmerged["rodcis"].astype(str).str.strip() + +# Find those in dfmerged but not in dfpoj +df_missing = dfmerged[~dfmerged["rodcis"].isin(dfpoj["rodcis"])].copy() + +print(f"❌ Počet pacientů v dfmerged, kteří NEJSOU v dfpoj: {len(df_missing)}") +print(df_missing.head()) + +# ========== 3️⃣ (Optional) Export differences ========== +EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230") +timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +xlsx_out = EXPORT_DIR / f"chybějící_pacienti_{timestamp}.xlsx" + +df_missing.to_excel(xlsx_out, index=False) +print(f"✅ Výsledek uložen do {xlsx_out}") \ No newline at end of file From c94d3b9e241789a7ebeaba4fb152a888320dadd4 Mon Sep 17 00:00:00 2001 From: Vladimir Buzalka Date: Fri, 24 Oct 2025 18:55:29 +0200 Subject: [PATCH 3/3] notebook --- 50 Dekurs.py | 33 ++++++++++++++++++--------------- 55 Dekurz show decoded.py | 8 ++------ 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/50 Dekurs.py b/50 Dekurs.py index f95261a..731251b 100644 --- a/50 Dekurs.py +++ b/50 Dekurs.py @@ -20,26 +20,20 @@ from pathlib import Path from striprtf.striprtf import rtf_to_text from openpyxl.styles import Font, Alignment, PatternFill, Border, Side from openpyxl.utils import get_column_letter +from Functions import get_medicus_connection # ================== CONFIGURATION ================== FDB_PATH = r"z:\Medicus 3\data\MEDICUS.FDB" EXPORT_DIR = Path(r"u:\Dropbox\!!!Days\Downloads Z230") -# calculate last 2 months dynamically +# calculate last 2 months dynamically (now set to 10 days for testing) DATE_FROM = (datetime.now() - timedelta(days=10)).strftime("%Y-%m-%d") timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") xlsx_path = EXPORT_DIR / f"{timestamp}_Dekurz (poslední rok).xlsx" # ================== FIREBIRD CONNECTION ================== -conn = fb.connect( - host="192.168.1.4", - port=3050, - database=FDB_PATH, - user="SYSDBA", - password="masterkey", - charset="WIN1250", -) +conn = get_medicus_connection() def query_df(sql, params=None): cur = conn.cursor() @@ -85,21 +79,29 @@ def safe_rtf_to_text(x): df["DEKURS"] = df["DEKURS"].apply(safe_rtf_to_text) -df.replace({r'(\r\n|\r|\n)': r'\r\n'}, regex=True, inplace=True) -df.replace({r'[\ud800-\udfff\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]+': ''}, - regex=True, inplace=True) -df.replace({r'(\r\n){2,}': r'\r\n', r'(\r\n)+$': ''}, - regex=True, inplace=True) +# --- Normalize and clean newlines --- +df["DEKURS"] = ( + df["DEKURS"] + .replace(r"(\r\n|\r|\n)+", "\n", regex=True) # unify newlines + .replace(r"\n{2,}", "\n", regex=True) # collapse multiple blank lines + .str.strip() # trim leading/trailing blanks +) +# --- Remove invalid control characters --- +df.replace({r"[\ud800-\udfff\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]+": ""}, regex=True, inplace=True) + +# --- Merge patient name --- df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("") df.drop(columns=["PRIJMENI", "JMENO"], inplace=True) +# --- Rename and format columns --- df.rename(columns={"ZKRATKA": "LEKAR", "VYKONY_DNE": "VYKONY DNE"}, inplace=True) df["DATUM"] = pd.to_datetime(df["DATUM"], errors="coerce").dt.date df.drop(columns=[c for c in df.columns if "ASCII" in c.upper()], inplace=True, errors="ignore") desired_order = ["DATUM", "RODCIS", "PACIENT", "LEKAR", "VYKONY DNE", "DEKURS"] df = df[[c for c in desired_order if c in df.columns]] + # ================== CLEANUP OLD FILES ================== for old_file in EXPORT_DIR.glob("*Dekurz (poslední rok)*.xlsx"): try: @@ -107,11 +109,12 @@ for old_file in EXPORT_DIR.glob("*Dekurz (poslední rok)*.xlsx"): print(f"🧹 Deleted old file: {old_file.name}") except Exception as e: print(f"⚠️ Could not delete {old_file.name}: {e}") + # ================== EXPORT TO EXCEL ================== with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer: df.to_excel(writer, index=False, sheet_name="Dekurz") ws = writer.sheets["Dekurz"] - + ws.freeze_panes = "F2" #zamčení prvního řádku a sloupců A:F # ----- Bright yellow header ----- header_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid") for cell in ws[1]: diff --git a/55 Dekurz show decoded.py b/55 Dekurz show decoded.py index 2e49255..af8da2b 100644 --- a/55 Dekurz show decoded.py +++ b/55 Dekurz show decoded.py @@ -7,14 +7,10 @@ Show decoded physician notes (RTF → plain text) directly in console. import fdb from striprtf.striprtf import rtf_to_text +from Functions import get_medicus_connection # ===== connection ===== -con = fdb.connect( - dsn='localhost:z:\\Medicus 3\\data\\medicus.fdb', - user='sysdba', - password='masterkey', - charset='WIN1250' -) +con = get_medicus_connection() cur = con.cursor() # ===== pick a few recent records =====