Files
reporty/40 fio 01.py
2025-10-20 15:44:37 +02:00

173 lines
5.8 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Import Fio banka CSV export (UTF-8, ; separated, quoted)
into MySQL database `fio.transactions`.
- Datum je vždy ve formátu dd.mm.yyyy.
- Přidává diagnostický fingerprint do sloupce `uniq_fp` (indexovaný).
- Zatím NEblokuje duplicity; jen je snadno zobrazí pro ladění.
"""
import csv
from pathlib import Path
from datetime import datetime
import pymysql
from pymysql.cursors import DictCursor
# ======== CONFIG ========
CSV_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230\Vyhledane pohyby (1).csv")
TABLE_NAME = "transactions"
MYSQL_CONFIG = {
"host": "192.168.1.76",
"port": 3307,
"user": "root",
"password": "Vlado9674+",
"database": "fio",
"charset": "utf8mb4",
"cursorclass": DictCursor,
"autocommit": True,
}
# ---------- helpers ----------
def clean(s: str):
"""Normalize text for consistent comparison (trim + lowercase)."""
if not s:
return None
s = s.strip().lower()
return s or None
def build_fingerprint(data: dict) -> str:
"""Sestaví diagnostický fingerprint z normalizovaných polí."""
parts = [
data["datum"].strftime("%Y-%m-%d") if data["datum"] else "",
f"{data['objem']:.2f}" if data["objem"] is not None else "",
data.get("cislo_uctu") or "",
data.get("protiucet") or "",
data.get("kod_banky") or "",
data.get("vs") or "",
(data.get("zprava") or "")[:100],
(data.get("poznamka") or "")[:100],
]
return "|".join(parts)
# ======== DB SETUP ========
def get_mysql_connection():
return pymysql.connect(**MYSQL_CONFIG)
def ensure_table_exists(conn):
sql_create = f"""
CREATE TABLE IF NOT EXISTS `{TABLE_NAME}` (
id INT AUTO_INCREMENT PRIMARY KEY,
datum DATE,
objem DECIMAL(12,2),
mena CHAR(3),
cislo_uctu VARCHAR(40),
protiucet VARCHAR(40),
kod_banky VARCHAR(20),
ks VARCHAR(20),
vs VARCHAR(20),
ss VARCHAR(20),
zprava VARCHAR(500),
poznamka VARCHAR(500),
uniq_fp VARCHAR(512),
imported_at DATETIME DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
"""
with conn.cursor() as cur:
cur.execute(sql_create)
# ✅ přidej sloupec pouze pokud chybí
cur.execute(f"SHOW COLUMNS FROM `{TABLE_NAME}` LIKE 'uniq_fp'")
if not cur.fetchone():
cur.execute(f"ALTER TABLE `{TABLE_NAME}` ADD COLUMN uniq_fp VARCHAR(512) NULL")
print("🆕 Sloupec uniq_fp přidán.")
# ✅ vytvoř index pokud ještě není
cur.execute(f"SHOW INDEX FROM `{TABLE_NAME}` WHERE Key_name = 'idx_uniq_fp'")
if not cur.fetchone():
cur.execute(f"CREATE INDEX idx_uniq_fp ON `{TABLE_NAME}` (uniq_fp)")
print("🆕 Index idx_uniq_fp vytvořen.")
print("✅ Tabulka a index zkontrolovány.")
# ======== IMPORT ========
def import_fio_csv():
with open(CSV_PATH, "r", encoding="utf-8-sig", newline="") as f:
reader = csv.DictReader(f, delimiter=";", quotechar='"')
rows = list(reader)
with get_mysql_connection() as conn:
ensure_table_exists(conn)
inserted = 0
for row in rows:
# --- DATUM ---
raw_date = (row.get("Datum") or "").strip()
try:
datum = datetime.strptime(raw_date, "%d.%m.%Y").date() if raw_date else None
except ValueError:
datum = None # případně continue, pokud chceš řádek zahodit
# --- OBJEM ---
objem_str = (row.get("Objem") or "").replace(" ", "").replace(",", ".")
try:
objem = float(objem_str)
except ValueError:
objem = None
# --- normalizace textů ---
data = {
"datum": datum,
"objem": objem,
"mena": clean(row.get("Měna")),
"cislo_uctu": clean(row.get("Číslo účtu")),
"protiucet": clean(row.get("Protiúčet")),
"kod_banky": clean(row.get("Kód banky")),
"ks": clean(row.get("KS")),
"vs": clean(row.get("VS")),
"ss": clean(row.get("SS")),
"zprava": clean(row.get("Zpráva pro příjemce")),
"poznamka": clean(row.get("Poznámka")),
}
# --- fingerprint pro ladění duplicit ---
data["uniq_fp"] = build_fingerprint(data)
# --- INSERT (záměrně bez IGNORE, ať duplicitní řádky opravdu uvidíme) ---
placeholders = ", ".join(["%s"] * len(data))
sql = f"INSERT INTO `{TABLE_NAME}` ({', '.join(data.keys())}) VALUES ({placeholders})"
with conn.cursor() as cur:
cur.execute(sql, list(data.values()))
inserted += 1
print(f"✅ Import hotový: vloženo {inserted} řádků.")
# ----- rychlý report duplicit podle fingerprintu -----
with conn.cursor() as cur:
cur.execute(f"""
SELECT uniq_fp, COUNT(*) AS c
FROM `{TABLE_NAME}`
GROUP BY uniq_fp
HAVING c > 1
ORDER BY c DESC
LIMIT 10;
""")
dups = cur.fetchall()
if dups:
print("\n⚠️ TOP 10 duplicitních fingerprintů (uniq_fp, count):")
for r in dups:
print(f" {r['uniq_fp']} -> {r['c']}")
print("\n💡 Tip: SELECT * FROM `transactions` WHERE uniq_fp = '...';")
else:
print("✅ Žádné duplicity podle uniq_fp nenalezeny.")
# ======== MAIN ========
if __name__ == "__main__":
if not CSV_PATH.exists():
raise SystemExit(f"❌ Soubor {CSV_PATH} nenalezen.")
import_fio_csv()