#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Import Fio banka CSV export (UTF-8, ; separated, quoted) into MySQL database `fio.transactions`. - Datum je vždy ve formátu dd.mm.yyyy. - Přidává diagnostický fingerprint do sloupce `uniq_fp` (indexovaný). - Zatím NEblokuje duplicity; jen je snadno zobrazí pro ladění. """ import csv from pathlib import Path from datetime import datetime import pymysql from pymysql.cursors import DictCursor # ======== CONFIG ======== CSV_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230\Vyhledane pohyby (1).csv") TABLE_NAME = "transactions" MYSQL_CONFIG = { "host": "192.168.1.76", "port": 3307, "user": "root", "password": "Vlado9674+", "database": "fio", "charset": "utf8mb4", "cursorclass": DictCursor, "autocommit": True, } # ---------- helpers ---------- def clean(s: str): """Normalize text for consistent comparison (trim + lowercase).""" if not s: return None s = s.strip().lower() return s or None def build_fingerprint(data: dict) -> str: """Sestaví diagnostický fingerprint z normalizovaných polí.""" parts = [ data["datum"].strftime("%Y-%m-%d") if data["datum"] else "", f"{data['objem']:.2f}" if data["objem"] is not None else "", data.get("cislo_uctu") or "", data.get("protiucet") or "", data.get("kod_banky") or "", data.get("vs") or "", (data.get("zprava") or "")[:100], (data.get("poznamka") or "")[:100], ] return "|".join(parts) # ======== DB SETUP ======== def get_mysql_connection(): return pymysql.connect(**MYSQL_CONFIG) def ensure_table_exists(conn): sql_create = f""" CREATE TABLE IF NOT EXISTS `{TABLE_NAME}` ( id INT AUTO_INCREMENT PRIMARY KEY, datum DATE, objem DECIMAL(12,2), mena CHAR(3), cislo_uctu VARCHAR(40), protiucet VARCHAR(40), kod_banky VARCHAR(20), ks VARCHAR(20), vs VARCHAR(20), ss VARCHAR(20), zprava VARCHAR(500), poznamka VARCHAR(500), uniq_fp VARCHAR(512), imported_at DATETIME DEFAULT CURRENT_TIMESTAMP ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; """ with conn.cursor() as cur: cur.execute(sql_create) # ✅ přidej sloupec pouze pokud chybí cur.execute(f"SHOW COLUMNS FROM `{TABLE_NAME}` LIKE 'uniq_fp'") if not cur.fetchone(): cur.execute(f"ALTER TABLE `{TABLE_NAME}` ADD COLUMN uniq_fp VARCHAR(512) NULL") print("🆕 Sloupec uniq_fp přidán.") # ✅ vytvoř index pokud ještě není cur.execute(f"SHOW INDEX FROM `{TABLE_NAME}` WHERE Key_name = 'idx_uniq_fp'") if not cur.fetchone(): cur.execute(f"CREATE INDEX idx_uniq_fp ON `{TABLE_NAME}` (uniq_fp)") print("🆕 Index idx_uniq_fp vytvořen.") print("✅ Tabulka a index zkontrolovány.") # ======== IMPORT ======== def import_fio_csv(): with open(CSV_PATH, "r", encoding="utf-8-sig", newline="") as f: reader = csv.DictReader(f, delimiter=";", quotechar='"') rows = list(reader) with get_mysql_connection() as conn: ensure_table_exists(conn) inserted = 0 for row in rows: # --- DATUM --- raw_date = (row.get("Datum") or "").strip() try: datum = datetime.strptime(raw_date, "%d.%m.%Y").date() if raw_date else None except ValueError: datum = None # případně continue, pokud chceš řádek zahodit # --- OBJEM --- objem_str = (row.get("Objem") or "").replace(" ", "").replace(",", ".") try: objem = float(objem_str) except ValueError: objem = None # --- normalizace textů --- data = { "datum": datum, "objem": objem, "mena": clean(row.get("Měna")), "cislo_uctu": clean(row.get("Číslo účtu")), "protiucet": clean(row.get("Protiúčet")), "kod_banky": clean(row.get("Kód banky")), "ks": clean(row.get("KS")), "vs": clean(row.get("VS")), "ss": clean(row.get("SS")), "zprava": clean(row.get("Zpráva pro příjemce")), "poznamka": clean(row.get("Poznámka")), } # --- fingerprint pro ladění duplicit --- data["uniq_fp"] = build_fingerprint(data) # --- INSERT (záměrně bez IGNORE, ať duplicitní řádky opravdu uvidíme) --- placeholders = ", ".join(["%s"] * len(data)) sql = f"INSERT INTO `{TABLE_NAME}` ({', '.join(data.keys())}) VALUES ({placeholders})" with conn.cursor() as cur: cur.execute(sql, list(data.values())) inserted += 1 print(f"✅ Import hotový: vloženo {inserted} řádků.") # ----- rychlý report duplicit podle fingerprintu ----- with conn.cursor() as cur: cur.execute(f""" SELECT uniq_fp, COUNT(*) AS c FROM `{TABLE_NAME}` GROUP BY uniq_fp HAVING c > 1 ORDER BY c DESC LIMIT 10; """) dups = cur.fetchall() if dups: print("\n⚠️ TOP 10 duplicitních fingerprintů (uniq_fp, count):") for r in dups: print(f" {r['uniq_fp']} -> {r['c']}") print("\n💡 Tip: SELECT * FROM `transactions` WHERE uniq_fp = '...';") else: print("✅ Žádné duplicity podle uniq_fp nenalezeny.") # ======== MAIN ======== if __name__ == "__main__": if not CSV_PATH.exists(): raise SystemExit(f"❌ Soubor {CSV_PATH} nenalezen.") import_fio_csv()