173 lines
5.8 KiB
Python
173 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
Import Fio banka CSV export (UTF-8, ; separated, quoted)
|
|
into MySQL database `fio.transactions`.
|
|
|
|
- Datum je vždy ve formátu dd.mm.yyyy.
|
|
- Přidává diagnostický fingerprint do sloupce `uniq_fp` (indexovaný).
|
|
- Zatím NEblokuje duplicity; jen je snadno zobrazí pro ladění.
|
|
"""
|
|
|
|
import csv
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import pymysql
|
|
from pymysql.cursors import DictCursor
|
|
|
|
# ======== CONFIG ========
|
|
CSV_PATH = Path(r"u:\Dropbox\!!!Days\Downloads Z230\Vyhledane pohyby (1).csv")
|
|
TABLE_NAME = "transactions"
|
|
|
|
MYSQL_CONFIG = {
|
|
"host": "192.168.1.76",
|
|
"port": 3307,
|
|
"user": "root",
|
|
"password": "Vlado9674+",
|
|
"database": "fio",
|
|
"charset": "utf8mb4",
|
|
"cursorclass": DictCursor,
|
|
"autocommit": True,
|
|
}
|
|
|
|
# ---------- helpers ----------
|
|
def clean(s: str):
|
|
"""Normalize text for consistent comparison (trim + lowercase)."""
|
|
if not s:
|
|
return None
|
|
s = s.strip().lower()
|
|
return s or None
|
|
|
|
def build_fingerprint(data: dict) -> str:
|
|
"""Sestaví diagnostický fingerprint z normalizovaných polí."""
|
|
parts = [
|
|
data["datum"].strftime("%Y-%m-%d") if data["datum"] else "",
|
|
f"{data['objem']:.2f}" if data["objem"] is not None else "",
|
|
data.get("cislo_uctu") or "",
|
|
data.get("protiucet") or "",
|
|
data.get("kod_banky") or "",
|
|
data.get("vs") or "",
|
|
(data.get("zprava") or "")[:100],
|
|
(data.get("poznamka") or "")[:100],
|
|
]
|
|
return "|".join(parts)
|
|
|
|
# ======== DB SETUP ========
|
|
def get_mysql_connection():
|
|
return pymysql.connect(**MYSQL_CONFIG)
|
|
|
|
def ensure_table_exists(conn):
|
|
sql_create = f"""
|
|
CREATE TABLE IF NOT EXISTS `{TABLE_NAME}` (
|
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
|
datum DATE,
|
|
objem DECIMAL(12,2),
|
|
mena CHAR(3),
|
|
cislo_uctu VARCHAR(40),
|
|
protiucet VARCHAR(40),
|
|
kod_banky VARCHAR(20),
|
|
ks VARCHAR(20),
|
|
vs VARCHAR(20),
|
|
ss VARCHAR(20),
|
|
zprava VARCHAR(500),
|
|
poznamka VARCHAR(500),
|
|
uniq_fp VARCHAR(512),
|
|
imported_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
|
"""
|
|
with conn.cursor() as cur:
|
|
cur.execute(sql_create)
|
|
|
|
# ✅ přidej sloupec pouze pokud chybí
|
|
cur.execute(f"SHOW COLUMNS FROM `{TABLE_NAME}` LIKE 'uniq_fp'")
|
|
if not cur.fetchone():
|
|
cur.execute(f"ALTER TABLE `{TABLE_NAME}` ADD COLUMN uniq_fp VARCHAR(512) NULL")
|
|
print("🆕 Sloupec uniq_fp přidán.")
|
|
|
|
# ✅ vytvoř index pokud ještě není
|
|
cur.execute(f"SHOW INDEX FROM `{TABLE_NAME}` WHERE Key_name = 'idx_uniq_fp'")
|
|
if not cur.fetchone():
|
|
cur.execute(f"CREATE INDEX idx_uniq_fp ON `{TABLE_NAME}` (uniq_fp)")
|
|
print("🆕 Index idx_uniq_fp vytvořen.")
|
|
|
|
print("✅ Tabulka a index zkontrolovány.")
|
|
|
|
# ======== IMPORT ========
|
|
def import_fio_csv():
|
|
with open(CSV_PATH, "r", encoding="utf-8-sig", newline="") as f:
|
|
reader = csv.DictReader(f, delimiter=";", quotechar='"')
|
|
rows = list(reader)
|
|
|
|
with get_mysql_connection() as conn:
|
|
ensure_table_exists(conn)
|
|
inserted = 0
|
|
|
|
for row in rows:
|
|
# --- DATUM ---
|
|
raw_date = (row.get("Datum") or "").strip()
|
|
try:
|
|
datum = datetime.strptime(raw_date, "%d.%m.%Y").date() if raw_date else None
|
|
except ValueError:
|
|
datum = None # případně continue, pokud chceš řádek zahodit
|
|
|
|
# --- OBJEM ---
|
|
objem_str = (row.get("Objem") or "").replace(" ", "").replace(",", ".")
|
|
try:
|
|
objem = float(objem_str)
|
|
except ValueError:
|
|
objem = None
|
|
|
|
# --- normalizace textů ---
|
|
data = {
|
|
"datum": datum,
|
|
"objem": objem,
|
|
"mena": clean(row.get("Měna")),
|
|
"cislo_uctu": clean(row.get("Číslo účtu")),
|
|
"protiucet": clean(row.get("Protiúčet")),
|
|
"kod_banky": clean(row.get("Kód banky")),
|
|
"ks": clean(row.get("KS")),
|
|
"vs": clean(row.get("VS")),
|
|
"ss": clean(row.get("SS")),
|
|
"zprava": clean(row.get("Zpráva pro příjemce")),
|
|
"poznamka": clean(row.get("Poznámka")),
|
|
}
|
|
|
|
# --- fingerprint pro ladění duplicit ---
|
|
data["uniq_fp"] = build_fingerprint(data)
|
|
|
|
# --- INSERT (záměrně bez IGNORE, ať duplicitní řádky opravdu uvidíme) ---
|
|
placeholders = ", ".join(["%s"] * len(data))
|
|
sql = f"INSERT INTO `{TABLE_NAME}` ({', '.join(data.keys())}) VALUES ({placeholders})"
|
|
with conn.cursor() as cur:
|
|
cur.execute(sql, list(data.values()))
|
|
inserted += 1
|
|
|
|
print(f"✅ Import hotový: vloženo {inserted} řádků.")
|
|
|
|
# ----- rychlý report duplicit podle fingerprintu -----
|
|
with conn.cursor() as cur:
|
|
cur.execute(f"""
|
|
SELECT uniq_fp, COUNT(*) AS c
|
|
FROM `{TABLE_NAME}`
|
|
GROUP BY uniq_fp
|
|
HAVING c > 1
|
|
ORDER BY c DESC
|
|
LIMIT 10;
|
|
""")
|
|
dups = cur.fetchall()
|
|
|
|
if dups:
|
|
print("\n⚠️ TOP 10 duplicitních fingerprintů (uniq_fp, count):")
|
|
for r in dups:
|
|
print(f" {r['uniq_fp']} -> {r['c']}")
|
|
print("\n💡 Tip: SELECT * FROM `transactions` WHERE uniq_fp = '...';")
|
|
else:
|
|
print("✅ Žádné duplicity podle uniq_fp nenalezeny.")
|
|
|
|
# ======== MAIN ========
|
|
if __name__ == "__main__":
|
|
if not CSV_PATH.exists():
|
|
raise SystemExit(f"❌ Soubor {CSV_PATH} nenalezen.")
|
|
import_fio_csv()
|