Files
2026-04-27 16:32:03 +02:00

48 lines
1.4 KiB
Python

import pandas as pd
CSV_FILE = "filename.csv"
df = pd.read_csv(CSV_FILE, sep=";", encoding="utf-8-sig")
# Parse dates
date_cols = ["Original Due Date", "Due Date", "Window Start Date", "Cutoff Date", "Completed Date"]
for col in date_cols:
df[col] = pd.to_datetime(df[col], errors="coerce")
# Country from site number
df["Country"] = df["Study Site Number"].str.extract(r"DD5-([A-Z]+)\d+")
print("=" * 60)
print("CTMS VISITS EXPORT — přehled dat")
print("=" * 60)
print(f"\nCelkem řádků : {len(df):,}")
print(f"Celkem sloupců: {len(df.columns)}")
print(f"\nSloupce:\n " + "\n ".join(df.columns.tolist()))
print(f"\nSites celkem : {df['Study Site Number'].nunique()}")
print(f"Zemí celkem : {df['Country'].nunique()}")
print(f"Země : {', '.join(sorted(df['Country'].dropna().unique()))}")
print("\nStatus:")
for k, v in df["Status"].value_counts().items():
print(f" {k:<20} {v:>6,}")
print("\nCategory:")
for k, v in df["Category"].value_counts().items():
print(f" {k:<25} {v:>6,}")
print("\nSub Category:")
for k, v in df["Sub Category"].value_counts().items():
print(f" {k:<20} {v:>6,}")
print(f"\nReference kódy: {sorted(df['Reference'].dropna().unique().tolist())}")
print("\nRozsah dat:")
for col in ["Due Date", "Completed Date"]:
vals = df[col].dropna()
if len(vals):
print(f" {col:<20} {vals.min().date()}{vals.max().date()}")
print("\nNáhled (5 řádků):")
print(df.head(5).to_string())