This commit is contained in:
2026-05-05 14:11:50 +02:00
parent 10eba225e7
commit 5f26ff0cc5
17 changed files with 2373 additions and 0 deletions
+172
View File
@@ -0,0 +1,172 @@
"""
Import Covance kit inventory CSV do MySQL tabulky covance_kit_inventory.
Strategie: versioning — každý import = nový import_id.
Metadata řádky na konci CSV (začínají "title" / "Search" / ...) jsou ignorovány.
"""
import os
import glob
import datetime
import numpy as np
import pandas as pd
import mysql.connector
import db_config
STUDY = "42847922MDD3003"
SOURCE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "SourceData")
# ── type converters ──────────────────────────────────────────────────────────
def _py(val):
if isinstance(val, np.generic):
return val.item()
return val
def to_str(val):
val = _py(val)
if val is None:
return None
if isinstance(val, float) and (val != val):
return None
s = str(val).strip()
return None if s.lower() in ("nan", "nat", "none", "") else s
def to_int(val):
val = _py(val)
try:
v = float(val)
return None if (v != v) else int(v)
except (TypeError, ValueError):
return None
def to_date(val):
val = _py(val)
if val is None:
return None
if isinstance(val, float) and (val != val):
return None
try:
if pd.isna(val):
return None
except (TypeError, ValueError):
pass
if isinstance(val, pd.Timestamp):
return None if pd.isna(val) else val.date()
if isinstance(val, datetime.datetime):
return val.date()
if isinstance(val, datetime.date):
return val
s = str(val).strip()
if not s or s.lower() in ("nat", "nan", "none", ""):
return None
for fmt in ("%b %d, %Y", "%d-%b-%Y", "%Y-%m-%d", "%d-%m-%Y"):
try:
return datetime.datetime.strptime(s, fmt).date()
except ValueError:
pass
return None
# ── DB helpers ───────────────────────────────────────────────────────────────
def get_conn():
return mysql.connector.connect(
host=db_config.DB_HOST, port=db_config.DB_PORT,
user=db_config.DB_USER, password=db_config.DB_PASSWORD,
database=db_config.DB_NAME,
)
def insert_import(cursor, study, source_file):
cursor.execute(
"INSERT INTO iwrs_import (study, imported_at, source_file, report_type) VALUES (%s, %s, %s, %s)",
(study, datetime.datetime.now(), source_file, "covance_kit_inventory"),
)
return cursor.lastrowid
# ── parser ───────────────────────────────────────────────────────────────────
def find_csv(study):
pattern = os.path.join(SOURCE_DIR, f"Protocol {study} sponsor-study-*kit-inventory*.csv")
files = glob.glob(pattern)
if not files:
raise FileNotFoundError(f"Nenalezen CSV soubor: {pattern}")
return sorted(files)[-1]
def parse_site(site_str):
"""'CZ10004 - Dr. Erik Herman' → ('CZ10004', 'Dr. Erik Herman')"""
if not site_str:
return None, None
parts = site_str.split(" - ", 1)
code = parts[0].strip()
name = parts[1].strip() if len(parts) > 1 else None
return code, name
def parse_csv(path):
df = pd.read_csv(path, dtype=str)
# odstraň metadata řádky na konci (Project No. není číslo)
df = df[pd.to_numeric(df["Project No."], errors="coerce").notna()].copy()
rows = []
for _, r in df.iterrows():
site_code, investigator_name = parse_site(to_str(r.get("Site")))
rows.append({
"project_no": to_str(r.get("Project No.")),
"region": to_str(r.get("Region")),
"country": to_str(r.get("Country")),
"site_code": site_code,
"investigator_name": investigator_name,
"kit_type": to_str(r.get("Kit Type")),
"description": to_str(r.get("Description")),
"accession": to_str(r.get("Accession")),
"shipped_date": to_date(r.get("Shipped Date")),
"expiration_date": to_date(r.get("Expiration Date")),
"days_to_expiration": to_int(r.get("Days to Expiration")),
})
return rows
# ── insert ───────────────────────────────────────────────────────────────────
def insert_kits(cursor, import_id, rows):
sql = """INSERT INTO covance_kit_inventory
(import_id, study, project_no, region, country, site_code, investigator_name,
kit_type, description, accession, shipped_date, expiration_date, days_to_expiration)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
for r in rows:
cursor.execute(sql, (
import_id, STUDY,
r["project_no"], r["region"], r["country"],
r["site_code"], r["investigator_name"],
r["kit_type"], r["description"], r["accession"],
r["shipped_date"], r["expiration_date"], r["days_to_expiration"],
))
# ── main ─────────────────────────────────────────────────────────────────────
def main():
csv_path = find_csv(STUDY)
print(f"Soubor: {os.path.basename(csv_path)}")
rows = parse_csv(csv_path)
print(f"Načteno řádků: {len(rows)}")
conn = get_conn()
cursor = conn.cursor()
import_id = insert_import(cursor, STUDY, os.path.basename(csv_path))
print(f"import_id = {import_id}")
insert_kits(cursor, import_id, rows)
conn.commit()
cursor.close()
conn.close()
print(f"Hotovo — {len(rows)} kitů importováno.")
main()