Files
janssen/Feasibility/TRASH/sipiq_download_v1.0.py
T
2026-06-19 14:28:54 +02:00

171 lines
6.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
sipiq_download_v1.0.py
======================
Verze: 1.0
Datum: 2026-06-19
Autor: Claude Code (pro MUDr. Vladimíra Buzalku)
Popis
-----
Automatické stažení SIPIQ survey reportu z Qualtrics přes oficiální
Export Responses API (3 kroky: start → poll → download ZIP → rozbalit CSV).
Nahrazuje ruční proklikávání Homepage → Results → Data & Analysis →
Export & Import → Export Data → CSV → Download.
Stažené CSV se uloží s TIMESTAMPOVANÝM názvem rovnou do importní složky
U:\\PythonProject\\Janssen\\Feasibility\\77242113UCO2001\\ImportSIPIQcompled
odkud ho bez úprav sebere `sipiq_import_v1.2.py` (delta import → Mongo).
Mapování UI → API:
"CSV" -> format=csv
"Export labels" -> useLabels=True (default; --values přepne na hodnoty)
"Download all" -> default API chování (všechna pole)
komprese (ZIP) -> API zapnuto automaticky
Konfigurace (root .env, NEVERZOVAT):
QUALTRICS_API_TOKEN API token (Account Settings → Qualtrics IDs → API)
QUALTRICS_DATACENTER default janssenfeasibility.co1
QUALTRICS_SURVEY_ID default SV_9AdeNaNyohp5fNQ
Použití
-------
python sipiq_download_v1.0.py # CSV s labely do ImportSIPIQcompled
python sipiq_download_v1.0.py --values # CSV s hodnotami místo labelů
python sipiq_download_v1.0.py --out "<složka>" # jiná cílová složka
python sipiq_download_v1.0.py --format tsv # jiný formát
Navazující import (samostatně):
python sipiq_import_v1.2.py --apply
Závislosti: requests, python-dotenv (.venv).
"""
import argparse
import io
import logging
import os
import sys
import time
import zipfile
from datetime import datetime
try:
import requests
from dotenv import load_dotenv
except ImportError:
print("CHYBA: chybí requests nebo python-dotenv v aktuálním pythonu.", file=sys.stderr)
raise
# --- konfigurace z root .env -------------------------------------------------
_HERE = os.path.dirname(os.path.abspath(__file__))
_ROOT = os.path.dirname(_HERE) # U:\PythonProject\Janssen
load_dotenv(os.path.join(_ROOT, ".env"))
API_TOKEN = os.environ.get("QUALTRICS_API_TOKEN", "").strip()
DATACENTER = os.environ.get("QUALTRICS_DATACENTER", "janssenfeasibility.co1").strip()
SURVEY_ID = os.environ.get("QUALTRICS_SURVEY_ID", "SV_9AdeNaNyohp5fNQ").strip()
BASE_URL = f"https://{DATACENTER}.qualtrics.com/API/v3"
HEADERS = {"X-API-TOKEN": API_TOKEN, "Content-Type": "application/json"}
DEFAULT_OUT = os.path.join(_HERE, "77242113UCO2001", "ImportSIPIQcompled")
log = logging.getLogger("sipiq_download")
def start_export(fmt: str = "csv", use_labels: bool = True) -> str:
"""Spustí export job. Vrací progressId."""
url = f"{BASE_URL}/surveys/{SURVEY_ID}/export-responses"
payload = {"format": fmt, "useLabels": use_labels}
r = requests.post(url, json=payload, headers=HEADERS, timeout=30)
r.raise_for_status()
return r.json()["result"]["progressId"]
def wait_for_export(progress_id: str, timeout_s: int = 300) -> str:
"""Polluje stav exportu po 2 s. Vrací fileId po dokončení."""
url = f"{BASE_URL}/surveys/{SURVEY_ID}/export-responses/{progress_id}"
deadline = time.time() + timeout_s
while time.time() < deadline:
r = requests.get(url, headers=HEADERS, timeout=30)
r.raise_for_status()
result = r.json()["result"]
status = result.get("status")
if status == "complete":
return result["fileId"]
if status == "failed":
raise RuntimeError("Qualtrics export selhal (status=failed).")
log.info("Export běží… %s%%", result.get("percentComplete", "?"))
time.sleep(2)
raise TimeoutError("Export nedoběhl v časovém limitu.")
def download_file(file_id: str, out_dir: str, stamp: str) -> list[str]:
"""Stáhne ZIP a rozbalí CSV do out_dir s timestampovaným prefixem.
Vrací seznam výsledných cest. Stávající soubory NEpřepisuje (jiný prefix).
"""
os.makedirs(out_dir, exist_ok=True)
url = f"{BASE_URL}/surveys/{SURVEY_ID}/export-responses/{file_id}/file"
r = requests.get(url, headers=HEADERS, timeout=180)
r.raise_for_status()
written = []
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
for name in z.namelist():
if name.endswith("/"):
continue
safe = os.path.basename(name)
target = os.path.join(out_dir, f"{stamp} sipiq-{safe}")
with z.open(name) as src, open(target, "wb") as dst:
dst.write(src.read())
written.append(target)
return written
def run_export(fmt: str, use_labels: bool, out_dir: str) -> list[str]:
"""Celý postup: start → wait → download."""
if not API_TOKEN:
raise SystemExit(
"CHYBA: QUALTRICS_API_TOKEN není nastaven v .env.\n"
"Vygeneruj token: Qualtrics → Account Settings → Qualtrics IDs → "
"API → Generate Token a vlož do U:\\PythonProject\\Janssen\\.env"
)
stamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
log.info("Survey %s @ %s%s", SURVEY_ID, DATACENTER, out_dir)
pid = start_export(fmt, use_labels)
log.info("Export spuštěn, progressId=%s", pid)
fid = wait_for_export(pid)
log.info("Export hotov, fileId=%s", fid)
files = download_file(fid, out_dir, stamp)
log.info("Staženo %d souborů.", len(files))
return files
def main():
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
p = argparse.ArgumentParser(description="SIPIQ Qualtrics CSV export → ImportSIPIQcompled")
p.add_argument("--format", default="csv", choices=["csv", "tsv", "json", "spss"])
p.add_argument("--values", action="store_true",
help="Export hodnot místo labelů (useLabels=False)")
p.add_argument("--out", default=DEFAULT_OUT, help="Cílová složka (default ImportSIPIQcompled)")
args = p.parse_args()
try:
files = run_export(fmt=args.format, use_labels=not args.values, out_dir=args.out)
except requests.HTTPError as e:
resp = e.response
log.error("HTTP %s: %s", resp.status_code if resp is not None else "?",
resp.text[:500] if resp is not None else e)
raise SystemExit(1)
print("Hotovo. Stažené soubory:")
for f in files:
print(" -", f)
if not files:
print(" (žádné CSV v exportu)")
if __name__ == "__main__":
main()