171 lines
6.4 KiB
Python
171 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
sipiq_download_v1.0.py
|
||
======================
|
||
Verze: 1.0
|
||
Datum: 2026-06-19
|
||
Autor: Claude Code (pro MUDr. Vladimíra Buzalku)
|
||
|
||
Popis
|
||
-----
|
||
Automatické stažení SIPIQ survey reportu z Qualtrics přes oficiální
|
||
Export Responses API (3 kroky: start → poll → download ZIP → rozbalit CSV).
|
||
Nahrazuje ruční proklikávání Homepage → Results → Data & Analysis →
|
||
Export & Import → Export Data → CSV → Download.
|
||
|
||
Stažené CSV se uloží s TIMESTAMPOVANÝM názvem rovnou do importní složky
|
||
U:\\PythonProject\\Janssen\\Feasibility\\77242113UCO2001\\ImportSIPIQcompled
|
||
odkud ho bez úprav sebere `sipiq_import_v1.2.py` (delta import → Mongo).
|
||
|
||
Mapování UI → API:
|
||
"CSV" -> format=csv
|
||
"Export labels" -> useLabels=True (default; --values přepne na hodnoty)
|
||
"Download all" -> default API chování (všechna pole)
|
||
komprese (ZIP) -> API zapnuto automaticky
|
||
|
||
Konfigurace (root .env, NEVERZOVAT):
|
||
QUALTRICS_API_TOKEN – API token (Account Settings → Qualtrics IDs → API)
|
||
QUALTRICS_DATACENTER – default janssenfeasibility.co1
|
||
QUALTRICS_SURVEY_ID – default SV_9AdeNaNyohp5fNQ
|
||
|
||
Použití
|
||
-------
|
||
python sipiq_download_v1.0.py # CSV s labely do ImportSIPIQcompled
|
||
python sipiq_download_v1.0.py --values # CSV s hodnotami místo labelů
|
||
python sipiq_download_v1.0.py --out "<složka>" # jiná cílová složka
|
||
python sipiq_download_v1.0.py --format tsv # jiný formát
|
||
|
||
Navazující import (samostatně):
|
||
python sipiq_import_v1.2.py --apply
|
||
|
||
Závislosti: requests, python-dotenv (.venv).
|
||
"""
|
||
import argparse
|
||
import io
|
||
import logging
|
||
import os
|
||
import sys
|
||
import time
|
||
import zipfile
|
||
from datetime import datetime
|
||
|
||
try:
|
||
import requests
|
||
from dotenv import load_dotenv
|
||
except ImportError:
|
||
print("CHYBA: chybí requests nebo python-dotenv v aktuálním pythonu.", file=sys.stderr)
|
||
raise
|
||
|
||
# --- konfigurace z root .env -------------------------------------------------
|
||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||
_ROOT = os.path.dirname(_HERE) # U:\PythonProject\Janssen
|
||
load_dotenv(os.path.join(_ROOT, ".env"))
|
||
|
||
API_TOKEN = os.environ.get("QUALTRICS_API_TOKEN", "").strip()
|
||
DATACENTER = os.environ.get("QUALTRICS_DATACENTER", "janssenfeasibility.co1").strip()
|
||
SURVEY_ID = os.environ.get("QUALTRICS_SURVEY_ID", "SV_9AdeNaNyohp5fNQ").strip()
|
||
|
||
BASE_URL = f"https://{DATACENTER}.qualtrics.com/API/v3"
|
||
HEADERS = {"X-API-TOKEN": API_TOKEN, "Content-Type": "application/json"}
|
||
|
||
DEFAULT_OUT = os.path.join(_HERE, "77242113UCO2001", "ImportSIPIQcompled")
|
||
|
||
log = logging.getLogger("sipiq_download")
|
||
|
||
|
||
def start_export(fmt: str = "csv", use_labels: bool = True) -> str:
|
||
"""Spustí export job. Vrací progressId."""
|
||
url = f"{BASE_URL}/surveys/{SURVEY_ID}/export-responses"
|
||
payload = {"format": fmt, "useLabels": use_labels}
|
||
r = requests.post(url, json=payload, headers=HEADERS, timeout=30)
|
||
r.raise_for_status()
|
||
return r.json()["result"]["progressId"]
|
||
|
||
|
||
def wait_for_export(progress_id: str, timeout_s: int = 300) -> str:
|
||
"""Polluje stav exportu po 2 s. Vrací fileId po dokončení."""
|
||
url = f"{BASE_URL}/surveys/{SURVEY_ID}/export-responses/{progress_id}"
|
||
deadline = time.time() + timeout_s
|
||
while time.time() < deadline:
|
||
r = requests.get(url, headers=HEADERS, timeout=30)
|
||
r.raise_for_status()
|
||
result = r.json()["result"]
|
||
status = result.get("status")
|
||
if status == "complete":
|
||
return result["fileId"]
|
||
if status == "failed":
|
||
raise RuntimeError("Qualtrics export selhal (status=failed).")
|
||
log.info("Export běží… %s%%", result.get("percentComplete", "?"))
|
||
time.sleep(2)
|
||
raise TimeoutError("Export nedoběhl v časovém limitu.")
|
||
|
||
|
||
def download_file(file_id: str, out_dir: str, stamp: str) -> list[str]:
|
||
"""Stáhne ZIP a rozbalí CSV do out_dir s timestampovaným prefixem.
|
||
|
||
Vrací seznam výsledných cest. Stávající soubory NEpřepisuje (jiný prefix).
|
||
"""
|
||
os.makedirs(out_dir, exist_ok=True)
|
||
url = f"{BASE_URL}/surveys/{SURVEY_ID}/export-responses/{file_id}/file"
|
||
r = requests.get(url, headers=HEADERS, timeout=180)
|
||
r.raise_for_status()
|
||
written = []
|
||
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
|
||
for name in z.namelist():
|
||
if name.endswith("/"):
|
||
continue
|
||
safe = os.path.basename(name)
|
||
target = os.path.join(out_dir, f"{stamp} sipiq-{safe}")
|
||
with z.open(name) as src, open(target, "wb") as dst:
|
||
dst.write(src.read())
|
||
written.append(target)
|
||
return written
|
||
|
||
|
||
def run_export(fmt: str, use_labels: bool, out_dir: str) -> list[str]:
|
||
"""Celý postup: start → wait → download."""
|
||
if not API_TOKEN:
|
||
raise SystemExit(
|
||
"CHYBA: QUALTRICS_API_TOKEN není nastaven v .env.\n"
|
||
"Vygeneruj token: Qualtrics → Account Settings → Qualtrics IDs → "
|
||
"API → Generate Token a vlož do U:\\PythonProject\\Janssen\\.env"
|
||
)
|
||
stamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
|
||
log.info("Survey %s @ %s → %s", SURVEY_ID, DATACENTER, out_dir)
|
||
pid = start_export(fmt, use_labels)
|
||
log.info("Export spuštěn, progressId=%s", pid)
|
||
fid = wait_for_export(pid)
|
||
log.info("Export hotov, fileId=%s", fid)
|
||
files = download_file(fid, out_dir, stamp)
|
||
log.info("Staženo %d souborů.", len(files))
|
||
return files
|
||
|
||
|
||
def main():
|
||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||
p = argparse.ArgumentParser(description="SIPIQ Qualtrics CSV export → ImportSIPIQcompled")
|
||
p.add_argument("--format", default="csv", choices=["csv", "tsv", "json", "spss"])
|
||
p.add_argument("--values", action="store_true",
|
||
help="Export hodnot místo labelů (useLabels=False)")
|
||
p.add_argument("--out", default=DEFAULT_OUT, help="Cílová složka (default ImportSIPIQcompled)")
|
||
args = p.parse_args()
|
||
|
||
try:
|
||
files = run_export(fmt=args.format, use_labels=not args.values, out_dir=args.out)
|
||
except requests.HTTPError as e:
|
||
resp = e.response
|
||
log.error("HTTP %s: %s", resp.status_code if resp is not None else "?",
|
||
resp.text[:500] if resp is not None else e)
|
||
raise SystemExit(1)
|
||
|
||
print("Hotovo. Stažené soubory:")
|
||
for f in files:
|
||
print(" -", f)
|
||
if not files:
|
||
print(" (žádné CSV v exportu)")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|