z230
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,158 @@
|
|||||||
|
"""
|
||||||
|
import_CZ_contacts.py
|
||||||
|
Importuje kontakty středisek Czechia z PANORAMA Dashboard xlsx do MySQL tabulky CTMS_contacts.
|
||||||
|
- Filtruje pouze řádky Country Name == 'Czechia'
|
||||||
|
- file_date bere z document properties xlsx (dcterms:created)
|
||||||
|
- Před importem smaže stávající záznamy se stejným file_date + country_name == 'Czechia'
|
||||||
|
"""
|
||||||
|
|
||||||
|
import zipfile
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import mysql.connector
|
||||||
|
|
||||||
|
# ── Konfigurace ────────────────────────────────────────────────────────────────
|
||||||
|
SOURCE_FILE = Path(r"U:\PythonProject\Janssen\CTMS\PanoramaContacts\SourceData\PANORAMA Dashboard (33).xlsx")
|
||||||
|
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": "192.168.1.76",
|
||||||
|
"port": 3306,
|
||||||
|
"user": "root",
|
||||||
|
"password": "Vlado9674+",
|
||||||
|
"database": "studie",
|
||||||
|
"charset": "utf8mb4",
|
||||||
|
}
|
||||||
|
|
||||||
|
TABLE = "CTMS_contacts"
|
||||||
|
COUNTRY = "Czechia"
|
||||||
|
SHEET = "Site Contacts"
|
||||||
|
HEADER_ROW = 5 # 0-based → řádek č. 6 v Excelu
|
||||||
|
|
||||||
|
|
||||||
|
# ── Pomocné funkce ─────────────────────────────────────────────────────────────
|
||||||
|
def get_file_created_date(xlsx_path: Path) -> datetime.date:
|
||||||
|
"""Vrátí datum vytvoření souboru z docProps/core.xml (dcterms:created)."""
|
||||||
|
ns = {"dcterms": "http://purl.org/dc/terms/"}
|
||||||
|
with zipfile.ZipFile(xlsx_path) as z:
|
||||||
|
with z.open("docProps/core.xml") as f:
|
||||||
|
root = ET.parse(f).getroot()
|
||||||
|
created_el = root.find("{http://purl.org/dc/terms/}created")
|
||||||
|
dt = datetime.fromisoformat(created_el.text.replace("Z", "+00:00"))
|
||||||
|
return dt.astimezone(timezone.utc).date()
|
||||||
|
|
||||||
|
|
||||||
|
def clean_value(val):
|
||||||
|
"""Převede NaN / NaT / float na None, jinak vrátí string nebo date."""
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
if isinstance(val, float):
|
||||||
|
import math
|
||||||
|
return None if math.isnan(val) else val
|
||||||
|
if hasattr(val, "_value"): # pd.NaT
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import pandas as pd
|
||||||
|
if pd.isna(val):
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if isinstance(val, pd.Timestamp):
|
||||||
|
return val.date() if not pd.isnull(val) else None
|
||||||
|
return val
|
||||||
|
|
||||||
|
|
||||||
|
# ── Hlavní logika ──────────────────────────────────────────────────────────────
|
||||||
|
def main():
|
||||||
|
print(f"Soubor : {SOURCE_FILE}")
|
||||||
|
|
||||||
|
# 1) Datum vytvoření z properties
|
||||||
|
file_date = get_file_created_date(SOURCE_FILE)
|
||||||
|
print(f"file_date (z docProps): {file_date}")
|
||||||
|
|
||||||
|
# 2) Načtení dat
|
||||||
|
print("Načítám Excel…")
|
||||||
|
df = pd.read_excel(SOURCE_FILE, sheet_name=SHEET, header=HEADER_ROW)
|
||||||
|
|
||||||
|
# 3) Filtr CZ
|
||||||
|
df_cz = df[df["Country Name"] == COUNTRY].copy()
|
||||||
|
print(f"Řádků CZ: {len(df_cz)}")
|
||||||
|
|
||||||
|
# 4) Mapování Excel sloupců → DB sloupce
|
||||||
|
col_map = {
|
||||||
|
"Sector": "sector",
|
||||||
|
"TA": "ta",
|
||||||
|
"Protocol ID": "protocol_id",
|
||||||
|
"GTL-GTM/CTM": "gtl_ctm",
|
||||||
|
"Country Name": "country_name",
|
||||||
|
"LTM Name": "ltm_name",
|
||||||
|
"Site ID": "site_id",
|
||||||
|
"SM Name": "sm_name",
|
||||||
|
"PI Full Name": "pi_full_name",
|
||||||
|
"Institution Name": "institution_name",
|
||||||
|
"Contact Identifier": "contact_identifier",
|
||||||
|
"Title": "contact_title",
|
||||||
|
"Last Name": "last_name",
|
||||||
|
"First Name": "first_name",
|
||||||
|
"Contact Role": "contact_role",
|
||||||
|
"Contact Type": "contact_type",
|
||||||
|
"Pr St Cont Primary Indicator": "primary_indicator",
|
||||||
|
"SUA Reporting Indicator": "sua_reporting_indicator",
|
||||||
|
"Financial Disclosure Indicator": "financial_disclosure_indicator",
|
||||||
|
"Contact Phone Number": "phone",
|
||||||
|
"Alternative Phone Number": "phone_alt",
|
||||||
|
"Mobile Phone Number": "phone_mobile",
|
||||||
|
"Contact Fax Number": "fax",
|
||||||
|
"Contact Email Address": "email",
|
||||||
|
"SUA Reporting Email Address": "email_sua",
|
||||||
|
"Contact Start Date": "contact_start_date",
|
||||||
|
"Contact End Date": "contact_end_date",
|
||||||
|
"Degree/qualification": "degree_qualification",
|
||||||
|
"Job Title": "job_title",
|
||||||
|
"Contact Address Line 1": "address_line1",
|
||||||
|
"Contact Address Line 2": "address_line2",
|
||||||
|
"Contact Address Line 3": "address_line3",
|
||||||
|
"Contact City": "city",
|
||||||
|
"Contact Addr State/Province": "state_province",
|
||||||
|
"Contact Zip/Postal Code": "zip_postal_code",
|
||||||
|
}
|
||||||
|
|
||||||
|
df_cz = df_cz.rename(columns=col_map)
|
||||||
|
db_cols = list(col_map.values())
|
||||||
|
|
||||||
|
# 5) Připojení k DB
|
||||||
|
print("Připojuji se k MySQL…")
|
||||||
|
conn = mysql.connector.connect(**DB_CONFIG)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# 6) Smazání stávajících záznamů pro stejný file_date + CZ (idempotentní import)
|
||||||
|
cursor.execute(
|
||||||
|
f"DELETE FROM {TABLE} WHERE file_date = %s AND country_name = %s",
|
||||||
|
(file_date, COUNTRY)
|
||||||
|
)
|
||||||
|
deleted = cursor.rowcount
|
||||||
|
print(f"Smazáno starých záznamů: {deleted}")
|
||||||
|
|
||||||
|
# 7) Insert
|
||||||
|
placeholders = ", ".join(["%s"] * (len(db_cols) + 1)) # +1 pro file_date
|
||||||
|
insert_cols = "file_date, " + ", ".join(db_cols)
|
||||||
|
sql_insert = f"INSERT INTO {TABLE} ({insert_cols}) VALUES ({placeholders})"
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
for _, row in df_cz.iterrows():
|
||||||
|
values = [file_date] + [clean_value(row.get(col)) for col in db_cols]
|
||||||
|
cursor.execute(sql_insert, values)
|
||||||
|
inserted += 1
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print(f"Importováno záznamů : {inserted}")
|
||||||
|
print("Hotovo OK")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,83 @@
|
|||||||
|
-- ============================================================
|
||||||
|
-- Databáze : studie
|
||||||
|
-- Tabulka : CTMS_contacts
|
||||||
|
-- Popis : Kontakty center ze systému PANORAMA (CTMS)
|
||||||
|
-- studie 42847922MDD3003 (Neuroscience)
|
||||||
|
-- Vytvořeno : 2026-05-07
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
USE studie;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS CTMS_contacts (
|
||||||
|
-- ── Interní klíče ──────────────────────────────────────────
|
||||||
|
id INT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||||
|
file_date DATE NOT NULL COMMENT 'Datum vytvoření zdrojového souboru (PANORAMA export)',
|
||||||
|
imported_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'Datum a čas importu záznamu do DB',
|
||||||
|
|
||||||
|
-- ── Studie / organizace ────────────────────────────────────
|
||||||
|
sector VARCHAR(20) COMMENT 'Pharma / ...',
|
||||||
|
ta VARCHAR(30) COMMENT 'Therapeutic Area',
|
||||||
|
protocol_id VARCHAR(20) COMMENT 'Protocol ID (např. 42847922MDD3003)',
|
||||||
|
gtl_ctm VARCHAR(50) COMMENT 'GTL-GTM/CTM jméno',
|
||||||
|
|
||||||
|
-- ── Lokalita (site) ────────────────────────────────────────
|
||||||
|
country_name VARCHAR(60) COMMENT 'Název země',
|
||||||
|
ltm_name VARCHAR(50) COMMENT 'LTM Name',
|
||||||
|
site_id VARCHAR(15) COMMENT 'Identifikátor střediska (např. S10-CZ10008)',
|
||||||
|
sm_name VARCHAR(60) COMMENT 'Site Manager Name',
|
||||||
|
pi_full_name VARCHAR(80) COMMENT 'Principal Investigator – celé jméno',
|
||||||
|
institution_name VARCHAR(100) COMMENT 'Název instituce / kliniky',
|
||||||
|
|
||||||
|
-- ── Kontaktní osoba ────────────────────────────────────────
|
||||||
|
contact_identifier INT UNSIGNED COMMENT 'PANORAMA interní ID kontaktu',
|
||||||
|
contact_title VARCHAR(25) COMMENT 'Titul (Mr, Ms, Dr, ...)',
|
||||||
|
last_name VARCHAR(50) COMMENT 'Příjmení',
|
||||||
|
first_name VARCHAR(40) COMMENT 'Jméno',
|
||||||
|
contact_role VARCHAR(50) COMMENT 'Role kontaktu (Study Coordinator, PI, ...)',
|
||||||
|
contact_type VARCHAR(30) COMMENT 'Typ kontaktu (Study-Site Staff, ...)',
|
||||||
|
|
||||||
|
-- ── Indikátory ─────────────────────────────────────────────
|
||||||
|
primary_indicator ENUM('Yes','No') COMMENT 'Pr St Cont Primary Indicator',
|
||||||
|
sua_reporting_indicator ENUM('Yes','No') COMMENT 'SUA Reporting Indicator',
|
||||||
|
financial_disclosure_indicator ENUM('Yes','No') COMMENT 'Financial Disclosure Indicator',
|
||||||
|
|
||||||
|
-- ── Kontaktní údaje ────────────────────────────────────────
|
||||||
|
phone VARCHAR(40) COMMENT 'Hlavní telefonní číslo',
|
||||||
|
phone_alt VARCHAR(40) COMMENT 'Alternativní telefonní číslo',
|
||||||
|
phone_mobile VARCHAR(40) COMMENT 'Mobilní číslo',
|
||||||
|
fax VARCHAR(40) COMMENT 'Faxové číslo',
|
||||||
|
email VARCHAR(100) COMMENT 'Hlavní e-mailová adresa',
|
||||||
|
email_sua VARCHAR(100) COMMENT 'SUA Reporting e-mail',
|
||||||
|
|
||||||
|
-- ── Datumy ─────────────────────────────────────────────────
|
||||||
|
contact_start_date DATE COMMENT 'Datum začátku platnosti kontaktu',
|
||||||
|
contact_end_date DATE COMMENT 'Datum konce platnosti kontaktu',
|
||||||
|
|
||||||
|
-- ── Kvalifikace ────────────────────────────────────────────
|
||||||
|
degree_qualification VARCHAR(30) COMMENT 'Titul / kvalifikace',
|
||||||
|
job_title VARCHAR(40) COMMENT 'Pracovní pozice',
|
||||||
|
|
||||||
|
-- ── Adresa ─────────────────────────────────────────────────
|
||||||
|
address_line1 VARCHAR(100) COMMENT 'Adresní řádek 1',
|
||||||
|
address_line2 VARCHAR(60) COMMENT 'Adresní řádek 2',
|
||||||
|
address_line3 VARCHAR(100) COMMENT 'Adresní řádek 3',
|
||||||
|
city VARCHAR(50) COMMENT 'Město',
|
||||||
|
state_province VARCHAR(40) COMMENT 'Stát / provincie',
|
||||||
|
zip_postal_code VARCHAR(20) COMMENT 'PSČ',
|
||||||
|
|
||||||
|
-- ── Klíče ──────────────────────────────────────────────────
|
||||||
|
PRIMARY KEY (id),
|
||||||
|
|
||||||
|
-- Rychlé vyhledávání podle nejčastěji dotazovaných polí
|
||||||
|
INDEX idx_file_date (file_date),
|
||||||
|
INDEX idx_country (country_name),
|
||||||
|
INDEX idx_site_id (site_id),
|
||||||
|
INDEX idx_protocol (protocol_id),
|
||||||
|
INDEX idx_contact_role (contact_role),
|
||||||
|
INDEX idx_email (email),
|
||||||
|
INDEX idx_contact_identifier (contact_identifier)
|
||||||
|
|
||||||
|
) ENGINE=InnoDB
|
||||||
|
DEFAULT CHARSET=utf8mb4
|
||||||
|
COLLATE=utf8mb4_unicode_ci
|
||||||
|
COMMENT='CTMS contacts – Site Contacts, studie 42847922MDD3003';
|
||||||
Reference in New Issue
Block a user