z230

2026-06-02 17:20:20 +02:00
parent ec187e673a
commit b433ef0446
58 changed files with 9247 additions and 0 deletions
@@ -0,0 +1,449 @@
+"""
+download_attachments_v1.0.py
+Nazev:  download_attachments_v1.0.py
+Verze:  1.0
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Stahuje skutecne prilohy (is_inline=False) vsech emailu z MongoDB kolekce
+    ordinace@buzalkova.cz primo pres Microsoft Graph API a uklada je do
+    adresare /mnt/Emails/ordinace@buzalkova.cz/Attachments/.
+
+    Deduplikace podle SHA256 hashe obsahu:
+        - stejny hash = soubor uz existuje -> preskoci
+        - prvni vyskytu souboru: ulozi pod puvodnimnazvem
+        - kolize nazvu (stejny nazev, jiny hash): faktura_2.pdf, faktura_3.pdf ...
+
+    Po ulozeni aktualizuje MongoDB:
+        - v email dokumentu: kazda priloha dostane file_hash + local_path
+        - kolekce emaily.attachments_index: _id=hash, filename, path, size_bytes,
+          mime_type, first_seen_at, ref_count (pocet emailu ktery ji obsahuje)
+
+    Bezpecne prerusit a opakovat:
+        - zpravy kde jsou vsechny prilohy uz stazene (maji file_hash) se preskoci
+        - --force-recheck znovu overi i uz stazene (pro pripad zmen na disku)
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    python download_attachments_v1.0.py               # stahni vse co chybi
+    python download_attachments_v1.0.py --limit 50    # test na prvnich 50 emailech
+    python download_attachments_v1.0.py --force-recheck  # overi i uz stazene
+
+Docker (po pridani mountu /mnt/user/Emails -> /mnt/Emails):
+    docker exec -it python-runner python /scripts/download_attachments_v1.0.py
+
+Zavislosti:
+    msal, requests, pymongo, python-dateutil
+    Python 3.10+
+
+Struktura na disku:
+    /mnt/Emails/
+    └── ordinace@buzalkova.cz/
+        └── Attachments/
+            ├── faktura_2026.pdf
+            ├── vysledky_lab.pdf
+            ├── vysledky_lab_2.pdf   <- kolize nazvu, jiny obsah
+            └── ...
+
+Kolekce emaily.attachments_index:
+    _id          SHA256 hash (hex)
+    filename     nazev souboru na disku (prvni vyskytu)
+    local_path   relativni cesta od Attachments/ (zatim = filename)
+    size_bytes   velikost souboru
+    mime_type    MIME typ
+    first_seen_at  datetime UTC
+    ref_count    v kolika emailech se tato priloha vyskytuje
+
+Aktualizace v email dokumentu (kolekce ordinace@buzalkova.cz):
+    attachments[i].file_hash    SHA256 hash
+    attachments[i].local_path   cesta relativni od Attachments/
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze
+"""
+
+import sys
+import hashlib
+import logging
+import argparse
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from pymongo import MongoClient, UpdateOne
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_MAILBOX       = "ordinace@buzalkova.cz"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI           = "mongodb://192.168.1.76:27017"
+MONGO_DB            = "emaily"
+MONGO_COL_EMAILS    = "ordinace@buzalkova.cz"
+MONGO_COL_INDEX     = "attachments_index"
+
+ATTACHMENTS_DIR     = Path("/mnt/Emails/ordinace@buzalkova.cz/Attachments")
+LOG_FILE            = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION      = "1.0"
+BATCH_SIZE          = 50
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+_graph_token: Optional[str] = None
+
+
+# ─── Graph API ────────────────────────────────────────────────────────────────
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get_bytes(url: str) -> bytes:
+    """Stahne binarni obsah prilohy."""
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, timeout=120, stream=True)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.content
+    raise RuntimeError(f"Graph GET bytes failed: {url}")
+
+
+def graph_get_json(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET json failed: {url}")
+
+
+def fetch_attachment_content(graph_message_id: str, attachment_id: str) -> Optional[bytes]:
+    """Stahne obsah prilohy pres Graph API."""
+    url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{graph_message_id}/attachments/{attachment_id}/$value"
+    try:
+        return graph_get_bytes(url)
+    except Exception as e:
+        logging.error("fetch_attachment_content failed [msg=%s att=%s]: %s", graph_message_id, attachment_id, e)
+        return None
+
+
+def fetch_message_attachments(graph_message_id: str) -> list[dict]:
+    """Nacte seznam priloh zpravy z Graph API (metadata vcetne attachment ID)."""
+    url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{graph_message_id}/attachments"
+    try:
+        data = graph_get_json(url, {"$select": "id,name,contentType,size,isInline,contentId"})
+        return data.get("value", [])
+    except Exception as e:
+        logging.error("fetch_message_attachments failed [%s]: %s", graph_message_id, e)
+        return []
+
+
+# ─── Dedup + ukládání ─────────────────────────────────────────────────────────
+
+def sha256(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+def resolve_filename(desired_name: str, att_dir: Path, hash_val: str, index_col) -> str:
+    """
+    Vrati nazev souboru ktery pouzit pro ulozeni.
+    Pokud desired_name jiz existuje s jinym hashem, prida suffix _2, _3 ...
+    """
+    # Zkontroluj jestli existujici soubor se stejnym nazvem ma stejny hash
+    existing = index_col.find_one({"filename": desired_name})
+    if existing:
+        if existing["_id"] == hash_val:
+            return desired_name  # Stejny hash, stejne jmeno — dedup hit
+        # Jiny hash — hledej volny suffix
+        stem   = Path(desired_name).stem
+        suffix = Path(desired_name).suffix
+        n = 2
+        while True:
+            candidate = f"{stem}_{n}{suffix}"
+            if not (att_dir / candidate).exists():
+                # Overi ze ani v indexu neni tento kandidat s jinym hashem
+                ex2 = index_col.find_one({"filename": candidate})
+                if not ex2 or ex2["_id"] == hash_val:
+                    return candidate
+            n += 1
+    return desired_name
+
+
+def save_attachment(content: bytes, original_name: str, att_dir: Path, index_col) -> tuple[str, str, bool]:
+    """
+    Ulozi prilohu s deduplikaci.
+    Vraci (hash, local_path, was_new):
+        was_new=True  -> soubor byl ulozen
+        was_new=False -> hash uz existoval, soubor preskocen
+    """
+    hash_val = sha256(content)
+
+    # Zkontroluj index — pokud hash uz existuje, vrat existujici zaznam
+    existing = index_col.find_one({"_id": hash_val})
+    if existing:
+        # Zvys pocitadlo referenci
+        index_col.update_one({"_id": hash_val}, {"$inc": {"ref_count": 1}})
+        return hash_val, existing["local_path"], False
+
+    # Novy soubor — urcit nazev
+    safe_name = "".join(c if c.isalnum() or c in "._- " else "_" for c in original_name).strip()
+    if not safe_name:
+        safe_name = f"attachment_{hash_val[:8]}"
+
+    filename  = resolve_filename(safe_name, att_dir, hash_val, index_col)
+    file_path = att_dir / filename
+
+    # Uloz soubor
+    file_path.write_bytes(content)
+
+    # Zaznamenej do indexu
+    index_col.insert_one({
+        "_id":          hash_val,
+        "filename":     filename,
+        "local_path":   filename,
+        "size_bytes":   len(content),
+        "mime_type":    "",
+        "first_seen_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        "ref_count":    1,
+    })
+
+    return hash_val, filename, True
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"download_attachments v{SCRIPT_VERSION}")
+    ap.add_argument("--limit",         type=int, default=0,
+                    help="Zpracovat max N emailu (0 = vse)")
+    ap.add_argument("--force-recheck", action="store_true",
+                    help="Znovu overi i emaily kde prilohy uz maji file_hash")
+    ap.add_argument("--no-indexes",    action="store_true",
+                    help="Nevytvorit indexy na konci")
+    args = ap.parse_args()
+
+    start = datetime.now()
+    print(f"=== download_attachments v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {GRAPH_MAILBOX}")
+    print(f"Cilovy adresar: {ATTACHMENTS_DIR}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}")
+
+    # Adresar
+    ATTACHMENTS_DIR.mkdir(parents=True, exist_ok=True)
+    print(f"  Adresar OK")
+
+    # Graph
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    # MongoDB
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+
+    col_emails = client[MONGO_DB][MONGO_COL_EMAILS]
+    col_index  = client[MONGO_DB][MONGO_COL_INDEX]
+
+    # Indexy na attachment index kolekci
+    if not args.no_indexes:
+        col_index.create_index("filename")
+        col_index.create_index("mime_type")
+
+    # Dotaz — emaily s prilohou ktere jeste nebyly zpracovany
+    if args.force_recheck:
+        query = {"has_attachments": True}
+    else:
+        query = {
+            "has_attachments": True,
+            "attachments": {
+                "$elemMatch": {
+                    "is_inline": False,
+                    "file_hash":  {"$exists": False},
+                }
+            }
+        }
+
+    total = col_emails.count_documents(query)
+    print(f"\nEmailu ke zpracovani: {total}")
+    if total == 0:
+        print("Neni co stahnout.")
+        client.close()
+        return
+
+    cursor = col_emails.find(query, {"_id": 1, "graph_id": 1, "subject": 1, "attachments": 1})
+    if args.limit:
+        cursor = cursor.limit(args.limit)
+
+    ok_count   = 0
+    new_count  = 0
+    skip_count = 0
+    err_count  = 0
+    email_i    = 0
+    batch      = []
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col_emails.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    for email_doc in cursor:
+        email_i += 1
+        email_id   = email_doc["_id"]
+        graph_id   = email_doc.get("graph_id", "")
+        subject    = (email_doc.get("subject") or "")[:60]
+        att_list   = email_doc.get("attachments") or []
+
+        # Jen skutecne prilohy
+        real_atts = [a for a in att_list if not a.get("is_inline", False)]
+        if not real_atts:
+            continue
+
+        print(f"\n  {email_i:>5}/{total}  {subject}")
+
+        # Nacti attachment IDs z Graph API
+        graph_atts = fetch_message_attachments(graph_id)
+        graph_att_map = {a["name"]: a for a in graph_atts if not a.get("isInline", False)}
+
+        updated_atts = list(att_list)
+        email_ok = True
+
+        for i, att in enumerate(updated_atts):
+            if att.get("is_inline", False):
+                continue
+            if not args.force_recheck and att.get("file_hash"):
+                skip_count += 1
+                print(f"         SKIP  {att['filename']}")
+                continue
+
+            att_name    = att.get("filename", "")
+            graph_att   = graph_att_map.get(att_name)
+
+            if not graph_att:
+                # Zkus najit podle casti nazvu
+                for gname, ga in graph_att_map.items():
+                    if att_name.lower() in gname.lower():
+                        graph_att = ga
+                        break
+
+            if not graph_att:
+                logging.error("attachment not found in Graph [email=%s att=%s]", email_id, att_name)
+                print(f"         ERR   {att_name} (nenalezeno v Graph)")
+                err_count += 1
+                email_ok = False
+                continue
+
+            # Stahni obsah
+            content = fetch_attachment_content(graph_id, graph_att["id"])
+            if content is None:
+                err_count += 1
+                email_ok = False
+                print(f"         ERR   {att_name} (stazeni selhalo)")
+                continue
+
+            # Uloz s dedupem
+            hash_val, local_path, was_new = save_attachment(content, att_name, ATTACHMENTS_DIR, col_index)
+
+            # Aktualizuj MIME typ v indexu
+            col_index.update_one(
+                {"_id": hash_val},
+                {"$set": {"mime_type": att.get("mime_type", graph_att.get("contentType", ""))}},
+            )
+
+            # Zaznamenej do emailu
+            updated_atts[i] = {**att, "file_hash": hash_val, "local_path": local_path}
+
+            if was_new:
+                new_count += 1
+                print(f"         NEW   {local_path}  ({len(content):,} B)")
+            else:
+                skip_count += 1
+                print(f"         DUP   {att_name} -> {local_path}")
+
+        if email_ok:
+            ok_count += 1
+
+        # Uloz aktualizovane prilohy zpet do emailu
+        batch.append(UpdateOne(
+            {"_id": email_id},
+            {"$set": {"attachments": updated_atts}}
+        ))
+
+        if len(batch) >= BATCH_SIZE:
+            flush()
+
+        if email_i % 100 == 0:
+            elapsed = (datetime.now() - start).total_seconds()
+            print(f"  {'─'*60}")
+            print(f"  Průběh: emaily={email_i}/{total}  nove={new_count}  dup={skip_count}  err={err_count}")
+            print(f"  {'─'*60}")
+
+    flush()
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    files_total   = col_index.count_documents({})
+    size_total    = sum(d.get("size_bytes", 0) for d in col_index.find({}, {"size_bytes": 1}))
+
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  emaily={ok_count}  |  nove soubory={new_count}  |  duplikaty={skip_count}  |  err={err_count}")
+    print(f"Souboru v indexu: {files_total}  ({size_total/1024/1024:.1f} MB)")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,428 @@
+"""
+download_attachments_v1.1.py
+Nazev:  download_attachments_v1.1.py
+Verze:  1.1
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Stahuje skutecne prilohy (is_inline=False) vsech emailu z MongoDB
+    pres Microsoft Graph API a uklada je do adresare
+    /mnt/Emails/<schránka>/Attachments/.
+
+    Schránka se predava jako povinny parametr --mailbox.
+
+    Deduplikace podle SHA256 hashe obsahu:
+        - stejny hash = soubor uz existuje -> preskoci
+        - prvni vyskytu souboru: ulozi pod puvodnimnazvem
+        - kolize nazvu (stejny nazev, jiny hash): faktura_2.pdf, faktura_3.pdf ...
+
+    Po ulozeni aktualizuje MongoDB:
+        - v email dokumentu: kazda priloha dostane file_hash + local_path
+        - kolekce emaily.attachments_index: _id=hash, filename, path, size_bytes,
+          mime_type, mailbox, first_seen_at, ref_count
+
+    Bezpecne prerusit a opakovat — emaily kde vsechny prilohy maji file_hash
+    se preskoci. --force-recheck znovu overi i uz stazene.
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    python download_attachments_v1.1.py --mailbox ordinace@buzalkova.cz
+    python download_attachments_v1.1.py --mailbox vladimir.buzalka@buzalka.cz --limit 50
+    python download_attachments_v1.1.py --mailbox ordinace@buzalkova.cz --force-recheck
+
+Docker:
+    docker exec -it python-runner python /scripts/download_attachments_v1.1.py \\
+        --mailbox ordinace@buzalkova.cz
+
+Zavislosti:
+    msal, requests, pymongo
+    Python 3.10+
+
+Struktura na disku:
+    /mnt/Emails/
+    └── <mailbox>/
+        └── Attachments/
+            ├── faktura_2026.pdf
+            ├── vysledky_lab.pdf
+            ├── vysledky_lab_2.pdf
+            └── ...
+
+Kolekce emaily.attachments_index:
+    _id            SHA256 hash (hex)
+    filename       nazev souboru na disku
+    local_path     relativni cesta od Attachments/
+    size_bytes     velikost souboru
+    mime_type      MIME typ
+    mailbox        schránka ze ktere pochazi prvni vyskytu
+    first_seen_at  datetime UTC
+    ref_count      v kolika emailech se tato priloha vyskytuje
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze
+    1.1  2026-06-02  Schránka jako parametr --mailbox (univerzalni pouziti)
+"""
+
+import sys
+import hashlib
+import logging
+import argparse
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from pymongo import MongoClient, UpdateOne
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI           = "mongodb://192.168.1.76:27017"
+MONGO_DB            = "emaily"
+MONGO_COL_INDEX     = "attachments_index"
+
+EMAILS_BASE_DIR     = Path("/mnt/Emails")
+LOG_FILE            = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION      = "1.1"
+BATCH_SIZE          = 50
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+_graph_token: Optional[str] = None
+
+
+# ─── Graph API ────────────────────────────────────────────────────────────────
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get_bytes(url: str) -> bytes:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, timeout=120, stream=True)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.content
+    raise RuntimeError(f"Graph GET bytes failed: {url}")
+
+
+def graph_get_json(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET json failed: {url}")
+
+
+def fetch_message_attachments(mailbox: str, graph_message_id: str) -> list[dict]:
+    url = f"{GRAPH_URL}/users/{mailbox}/messages/{graph_message_id}/attachments"
+    try:
+        data = graph_get_json(url, {"$select": "id,name,contentType,size,isInline,contentId"})
+        return data.get("value", [])
+    except Exception as e:
+        logging.error("fetch_message_attachments failed [%s]: %s", graph_message_id, e)
+        return []
+
+
+def fetch_attachment_content(mailbox: str, graph_message_id: str, attachment_id: str) -> Optional[bytes]:
+    url = f"{GRAPH_URL}/users/{mailbox}/messages/{graph_message_id}/attachments/{attachment_id}/$value"
+    try:
+        return graph_get_bytes(url)
+    except Exception as e:
+        logging.error("fetch_attachment_content failed [msg=%s att=%s]: %s", graph_message_id, attachment_id, e)
+        return None
+
+
+# ─── Dedup + ukládání ─────────────────────────────────────────────────────────
+
+def sha256(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+def safe_filename(name: str) -> str:
+    safe = "".join(c if c.isalnum() or c in "._- " else "_" for c in name).strip()
+    return safe or "attachment"
+
+
+def resolve_filename(desired_name: str, att_dir: Path, hash_val: str, col_index) -> str:
+    """Vrati nazev souboru pro ulozeni — resi kolize (stejny nazev, jiny hash)."""
+    existing = col_index.find_one({"filename": desired_name})
+    if existing:
+        if existing["_id"] == hash_val:
+            return desired_name  # Dedup hit — stejny hash
+        # Kolize — hledej volny suffix
+        stem   = Path(desired_name).stem
+        suffix = Path(desired_name).suffix
+        n = 2
+        while True:
+            candidate = f"{stem}_{n}{suffix}"
+            ex2 = col_index.find_one({"filename": candidate})
+            if not ex2 or ex2["_id"] == hash_val:
+                if not (att_dir / candidate).exists() or (ex2 and ex2["_id"] == hash_val):
+                    return candidate
+            n += 1
+    return desired_name
+
+
+def save_attachment(
+    content: bytes,
+    original_name: str,
+    mime_type: str,
+    mailbox: str,
+    att_dir: Path,
+    col_index,
+) -> tuple[str, str, bool]:
+    """
+    Ulozi prilohu s deduplikaci.
+    Vraci (hash, local_path, was_new).
+    """
+    hash_val = sha256(content)
+
+    existing = col_index.find_one({"_id": hash_val})
+    if existing:
+        col_index.update_one({"_id": hash_val}, {"$inc": {"ref_count": 1}})
+        return hash_val, existing["local_path"], False
+
+    filename  = resolve_filename(safe_filename(original_name), att_dir, hash_val, col_index)
+    file_path = att_dir / filename
+    file_path.write_bytes(content)
+
+    col_index.insert_one({
+        "_id":          hash_val,
+        "filename":     filename,
+        "local_path":   filename,
+        "size_bytes":   len(content),
+        "mime_type":    mime_type,
+        "mailbox":      mailbox,
+        "first_seen_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        "ref_count":    1,
+    })
+
+    return hash_val, filename, True
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"download_attachments v{SCRIPT_VERSION}")
+    ap.add_argument("--mailbox",       required=True,
+                    help="Emailova schranka (napr. ordinace@buzalkova.cz)")
+    ap.add_argument("--limit",         type=int, default=0,
+                    help="Zpracovat max N emailu (0 = vse)")
+    ap.add_argument("--force-recheck", action="store_true",
+                    help="Znovu overi i emaily kde prilohy uz maji file_hash")
+    ap.add_argument("--no-indexes",    action="store_true",
+                    help="Nevytvorit indexy na attachments_index kolekci")
+    args = ap.parse_args()
+
+    mailbox     = args.mailbox
+    att_dir     = EMAILS_BASE_DIR / mailbox / "Attachments"
+    mongo_col   = mailbox
+
+    start = datetime.now()
+    print(f"=== download_attachments v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {mailbox}")
+    print(f"Cilovy adresar: {att_dir}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}.{mongo_col}")
+
+    att_dir.mkdir(parents=True, exist_ok=True)
+    print("  Adresar OK")
+
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+
+    col_emails = client[MONGO_DB][mongo_col]
+    col_index  = client[MONGO_DB][MONGO_COL_INDEX]
+
+    if not args.no_indexes:
+        col_index.create_index("filename")
+        col_index.create_index("mime_type")
+        col_index.create_index("mailbox")
+
+    # Dotaz
+    if args.force_recheck:
+        query = {"has_attachments": True}
+    else:
+        query = {
+            "has_attachments": True,
+            "attachments": {
+                "$elemMatch": {
+                    "is_inline": False,
+                    "file_hash": {"$exists": False},
+                }
+            }
+        }
+
+    total = col_emails.count_documents(query)
+    print(f"\nEmailu ke zpracovani: {total}")
+    if total == 0:
+        print("Neni co stahnout.")
+        client.close()
+        return
+
+    cursor = col_emails.find(query, {"_id": 1, "graph_id": 1, "subject": 1, "attachments": 1})
+    if args.limit:
+        cursor = cursor.limit(args.limit)
+
+    ok_count   = 0
+    new_count  = 0
+    dup_count  = 0
+    err_count  = 0
+    email_i    = 0
+    batch      = []
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col_emails.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    for email_doc in cursor:
+        email_i   += 1
+        email_id   = email_doc["_id"]
+        graph_id   = email_doc.get("graph_id", "")
+        subject    = (email_doc.get("subject") or "")[:60]
+        att_list   = email_doc.get("attachments") or []
+
+        real_atts = [a for a in att_list if not a.get("is_inline", False)]
+        if not real_atts:
+            continue
+
+        print(f"\n  {email_i:>5}/{total}  {subject}")
+
+        graph_atts    = fetch_message_attachments(mailbox, graph_id)
+        graph_att_map = {a["name"]: a for a in graph_atts if not a.get("isInline", False)}
+
+        updated_atts = list(att_list)
+        email_ok     = True
+
+        for i, att in enumerate(updated_atts):
+            if att.get("is_inline", False):
+                continue
+            if not args.force_recheck and att.get("file_hash"):
+                print(f"         SKIP  {att['filename']}")
+                continue
+
+            att_name  = att.get("filename", "")
+            graph_att = graph_att_map.get(att_name)
+            if not graph_att:
+                for gname, ga in graph_att_map.items():
+                    if att_name.lower() in gname.lower():
+                        graph_att = ga
+                        break
+
+            if not graph_att:
+                logging.error("attachment not found in Graph [email=%s att=%s]", email_id, att_name)
+                print(f"         ERR   {att_name} (nenalezeno v Graph)")
+                err_count += 1
+                email_ok = False
+                continue
+
+            content = fetch_attachment_content(mailbox, graph_id, graph_att["id"])
+            if content is None:
+                err_count += 1
+                email_ok = False
+                print(f"         ERR   {att_name} (stazeni selhalo)")
+                continue
+
+            mime_type = att.get("mime_type") or graph_att.get("contentType", "")
+            hash_val, local_path, was_new = save_attachment(
+                content, att_name, mime_type, mailbox, att_dir, col_index
+            )
+
+            updated_atts[i] = {**att, "file_hash": hash_val, "local_path": local_path}
+
+            if was_new:
+                new_count += 1
+                print(f"         NEW   {local_path}  ({len(content):,} B)")
+            else:
+                dup_count += 1
+                print(f"         DUP   {att_name} -> {local_path}")
+
+        if email_ok:
+            ok_count += 1
+
+        batch.append(UpdateOne({"_id": email_id}, {"$set": {"attachments": updated_atts}}))
+
+        if len(batch) >= BATCH_SIZE:
+            flush()
+
+        if email_i % 100 == 0:
+            elapsed = (datetime.now() - start).total_seconds()
+            print(f"  {'─'*60}")
+            print(f"  Průběh: emaily={email_i}/{total}  nove={new_count}  dup={dup_count}  err={err_count}")
+            print(f"  {'─'*60}")
+
+    flush()
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    files_total   = col_index.count_documents({})
+    size_total    = sum(d.get("size_bytes", 0) for d in col_index.find({}, {"size_bytes": 1}))
+
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  emaily={ok_count}  |  nove={new_count}  |  dup={dup_count}  |  err={err_count}")
+    print(f"Souboru v indexu: {files_total}  ({size_total / 1024 / 1024:.1f} MB)")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,466 @@
+"""
+download_attachments_v1.2.py
+Nazev:  download_attachments_v1.2.py
+Verze:  1.2
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Stahuje skutecne prilohy (is_inline=False) vsech emailu z MongoDB
+    pres Microsoft Graph API a uklada je do adresare
+    /mnt/Emails/<schránka>/Attachments/.
+
+    Schránka se predava jako povinny parametr --mailbox.
+
+    Deduplikace podle SHA256 hashe obsahu:
+        - stejny hash = soubor uz existuje -> preskoci
+        - prvni vyskytu souboru: ulozi pod puvodnimnazvem
+        - kolize nazvu (stejny nazev, jiny hash): faktura_2.pdf, faktura_3.pdf ...
+
+    Po ulozeni aktualizuje MongoDB:
+        - v email dokumentu: kazda priloha dostane file_hash + local_path
+        - kolekce emaily.attachments_index: _id=hash, filename, path, size_bytes,
+          mime_type, mailbox, first_seen_at, ref_count
+
+    Bezpecne prerusit a opakovat — emaily kde vsechny prilohy maji file_hash
+    se preskoci. --force-recheck znovu overi i uz stazene.
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    python download_attachments_v1.2.py --mailbox ordinace@buzalkova.cz
+    python download_attachments_v1.2.py --mailbox ordinace@buzalkova.cz --limit 50
+    python download_attachments_v1.2.py --mailbox ordinace@buzalkova.cz --force-recheck
+
+Docker:
+    docker exec -it python-runner python /scripts/download_attachments_v1.2.py \\
+        --mailbox ordinace@buzalkova.cz
+
+Zavislosti:
+    msal, requests, pymongo
+    Python 3.10+
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze
+    1.1  2026-06-02  Schránka jako parametr --mailbox
+    1.2  2026-06-02  Oprava: Graph attachment mapa vcetne inline (fix ERR pri
+                     inline obrazcich ulozených jako is_inline=False v MongoDB);
+                     normalizace nazvu pro robustni porovnani; preskoceni S/MIME
+                     (.p7m/.p7s); pokud Graph oznaci jako inline -> SKIP ne ERR
+"""
+
+import sys
+import re
+import hashlib
+import logging
+import argparse
+import unicodedata
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from pymongo import MongoClient, UpdateOne
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI           = "mongodb://192.168.1.76:27017"
+MONGO_DB            = "emaily"
+MONGO_COL_INDEX     = "attachments_index"
+
+EMAILS_BASE_DIR     = Path("/mnt/Emails")
+LOG_FILE            = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION      = "1.2"
+BATCH_SIZE          = 50
+
+# Typy příloh které přeskočíme (S/MIME podpisy, certifikáty)
+SKIP_EXTENSIONS = {".p7m", ".p7s", ".p7c", ".p7b"}
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+_graph_token: Optional[str] = None
+
+
+# ─── Graph API ────────────────────────────────────────────────────────────────
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get_bytes(url: str) -> bytes:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, timeout=120, stream=True)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.content
+    raise RuntimeError(f"Graph GET bytes failed: {url}")
+
+
+def graph_get_json(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET json failed: {url}")
+
+
+def fetch_message_attachments(mailbox: str, graph_message_id: str) -> list[dict]:
+    """Nacte VSECHNY prilohy zpravy (vcetne inline) — filtrovani az pozdeji."""
+    url = f"{GRAPH_URL}/users/{mailbox}/messages/{graph_message_id}/attachments"
+    try:
+        data = graph_get_json(url, {"$select": "id,name,contentType,size,isInline,contentId"})
+        return data.get("value", [])
+    except Exception as e:
+        logging.error("fetch_message_attachments failed [%s]: %s", graph_message_id, e)
+        return []
+
+
+def fetch_attachment_content(mailbox: str, graph_message_id: str, attachment_id: str) -> Optional[bytes]:
+    url = f"{GRAPH_URL}/users/{mailbox}/messages/{graph_message_id}/attachments/{attachment_id}/$value"
+    try:
+        return graph_get_bytes(url)
+    except Exception as e:
+        logging.error("fetch_attachment_content failed [msg=%s att=%s]: %s",
+                      graph_message_id, attachment_id, e)
+        return None
+
+
+# ─── Pomocné funkce ───────────────────────────────────────────────────────────
+
+def normalize_name(name: str) -> str:
+    """Normalizuje název pro porovnání — lowercase, bez diakritiky, jen alnum+._-"""
+    nfkd = unicodedata.normalize("NFKD", name.lower().strip())
+    ascii_str = "".join(c for c in nfkd if not unicodedata.combining(c))
+    return re.sub(r"[^\w.\-]", "_", ascii_str)
+
+
+def find_graph_att(att_name: str, att_size: int, graph_atts: list[dict]) -> Optional[dict]:
+    """
+    Hleda prilohu v Graph listu.
+    1. Presna shoda jmena
+    2. Normalizovana shoda jmena
+    3. Shoda jmena + velikosti (toleruje drobne rozdily v nazvu)
+    """
+    # 1. Presna shoda
+    for ga in graph_atts:
+        if ga["name"] == att_name:
+            return ga
+
+    norm_want = normalize_name(att_name)
+
+    # 2. Normalizovana shoda
+    for ga in graph_atts:
+        if normalize_name(ga["name"]) == norm_want:
+            return ga
+
+    # 3. Normalizovana shoda + velikost (±10 %)
+    for ga in graph_atts:
+        if normalize_name(ga["name"]) == norm_want:
+            ga_size = ga.get("size", 0)
+            if att_size == 0 or ga_size == 0 or abs(ga_size - att_size) / max(ga_size, att_size) < 0.1:
+                return ga
+
+    # 4. Castecna shoda sufixu (posledních 20 znaků normalizovaného jména)
+    for ga in graph_atts:
+        if norm_want[-20:] and normalize_name(ga["name"]).endswith(norm_want[-20:]):
+            return ga
+
+    return None
+
+
+def sha256(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+def safe_filename(name: str) -> str:
+    safe = "".join(c if c.isalnum() or c in "._- ()" else "_" for c in name).strip()
+    return safe or "attachment"
+
+
+def resolve_filename(desired_name: str, att_dir: Path, hash_val: str, col_index) -> str:
+    existing = col_index.find_one({"filename": desired_name})
+    if existing:
+        if existing["_id"] == hash_val:
+            return desired_name
+        stem   = Path(desired_name).stem
+        suffix = Path(desired_name).suffix
+        n = 2
+        while True:
+            candidate = f"{stem}_{n}{suffix}"
+            ex2 = col_index.find_one({"filename": candidate})
+            if not ex2 or ex2["_id"] == hash_val:
+                if not (att_dir / candidate).exists() or (ex2 and ex2["_id"] == hash_val):
+                    return candidate
+            n += 1
+    return desired_name
+
+
+def save_attachment(
+    content: bytes,
+    original_name: str,
+    mime_type: str,
+    mailbox: str,
+    att_dir: Path,
+    col_index,
+) -> tuple[str, str, bool]:
+    hash_val = sha256(content)
+
+    existing = col_index.find_one({"_id": hash_val})
+    if existing:
+        col_index.update_one({"_id": hash_val}, {"$inc": {"ref_count": 1}})
+        return hash_val, existing["local_path"], False
+
+    filename  = resolve_filename(safe_filename(original_name), att_dir, hash_val, col_index)
+    file_path = att_dir / filename
+    file_path.write_bytes(content)
+
+    col_index.insert_one({
+        "_id":           hash_val,
+        "filename":      filename,
+        "local_path":    filename,
+        "size_bytes":    len(content),
+        "mime_type":     mime_type,
+        "mailbox":       mailbox,
+        "first_seen_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        "ref_count":     1,
+    })
+
+    return hash_val, filename, True
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"download_attachments v{SCRIPT_VERSION}")
+    ap.add_argument("--mailbox",       required=True,
+                    help="Emailova schranka (napr. ordinace@buzalkova.cz)")
+    ap.add_argument("--limit",         type=int, default=0,
+                    help="Zpracovat max N emailu (0 = vse)")
+    ap.add_argument("--force-recheck", action="store_true",
+                    help="Znovu overi i emaily kde prilohy uz maji file_hash")
+    ap.add_argument("--no-indexes",    action="store_true",
+                    help="Nevytvorit indexy na attachments_index kolekci")
+    args = ap.parse_args()
+
+    mailbox   = args.mailbox
+    att_dir   = EMAILS_BASE_DIR / mailbox / "Attachments"
+    mongo_col = mailbox
+
+    start = datetime.now()
+    print(f"=== download_attachments v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {mailbox}")
+    print(f"Cilovy adresar: {att_dir}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}.{mongo_col}")
+
+    att_dir.mkdir(parents=True, exist_ok=True)
+    print("  Adresar OK")
+
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+
+    col_emails = client[MONGO_DB][mongo_col]
+    col_index  = client[MONGO_DB][MONGO_COL_INDEX]
+
+    if not args.no_indexes:
+        col_index.create_index("filename")
+        col_index.create_index("mime_type")
+        col_index.create_index("mailbox")
+
+    if args.force_recheck:
+        query = {"has_attachments": True}
+    else:
+        query = {
+            "has_attachments": True,
+            "attachments": {
+                "$elemMatch": {
+                    "is_inline": False,
+                    "file_hash": {"$exists": False},
+                }
+            }
+        }
+
+    total = col_emails.count_documents(query)
+    print(f"\nEmailu ke zpracovani: {total}")
+    if total == 0:
+        print("Neni co stahnout.")
+        client.close()
+        return
+
+    cursor = col_emails.find(query, {"_id": 1, "graph_id": 1, "subject": 1, "attachments": 1})
+    if args.limit:
+        cursor = cursor.limit(args.limit)
+
+    ok_count  = 0
+    new_count = 0
+    dup_count = 0
+    skip_count = 0
+    err_count = 0
+    email_i   = 0
+    batch     = []
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col_emails.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    for email_doc in cursor:
+        email_i  += 1
+        email_id  = email_doc["_id"]
+        graph_id  = email_doc.get("graph_id", "")
+        subject   = (email_doc.get("subject") or "")[:60]
+        att_list  = email_doc.get("attachments") or []
+
+        real_atts = [a for a in att_list if not a.get("is_inline", False)]
+        if not real_atts:
+            continue
+
+        print(f"\n  {email_i:>5}/{total}  {subject}")
+
+        # Nacti VSECHNY prilohy z Graph (vcetne inline — potrebujeme je pro matching)
+        graph_atts = fetch_message_attachments(mailbox, graph_id)
+
+        updated_atts = list(att_list)
+        email_ok     = True
+
+        for i, att in enumerate(updated_atts):
+            if att.get("is_inline", False):
+                continue
+            if not args.force_recheck and att.get("file_hash"):
+                continue
+
+            att_name = att.get("filename", "")
+            att_size = att.get("size_bytes", 0)
+
+            # Preskoc S/MIME podpisy
+            if Path(att_name).suffix.lower() in SKIP_EXTENSIONS:
+                updated_atts[i] = {**att, "file_hash": "skip", "local_path": ""}
+                skip_count += 1
+                print(f"         SKIP  {att_name} (S/MIME)")
+                continue
+
+            # Najdi prilohu v Graph
+            graph_att = find_graph_att(att_name, att_size, graph_atts)
+
+            if not graph_att:
+                logging.error("attachment not found [email=%s att=%s]", email_id, att_name)
+                print(f"         ERR   {att_name} (nenalezeno)")
+                err_count += 1
+                email_ok = False
+                continue
+
+            # Pokud Graph rika ze je inline — preskoc, nestahujem
+            if graph_att.get("isInline", False):
+                updated_atts[i] = {**att, "is_inline": True, "file_hash": "skip", "local_path": ""}
+                skip_count += 1
+                print(f"         SKIP  {att_name} (inline obrazek)")
+                continue
+
+            content = fetch_attachment_content(mailbox, graph_id, graph_att["id"])
+            if content is None:
+                err_count += 1
+                email_ok = False
+                print(f"         ERR   {att_name} (stazeni selhalo)")
+                continue
+
+            mime_type = att.get("mime_type") or graph_att.get("contentType", "")
+            hash_val, local_path, was_new = save_attachment(
+                content, att_name, mime_type, mailbox, att_dir, col_index
+            )
+
+            updated_atts[i] = {**att, "file_hash": hash_val, "local_path": local_path}
+
+            if was_new:
+                new_count += 1
+                print(f"         NEW   {local_path}  ({len(content):,} B)")
+            else:
+                dup_count += 1
+                print(f"         DUP   {att_name} -> {local_path}")
+
+        if email_ok:
+            ok_count += 1
+
+        batch.append(UpdateOne({"_id": email_id}, {"$set": {"attachments": updated_atts}}))
+
+        if len(batch) >= BATCH_SIZE:
+            flush()
+
+        if email_i % 100 == 0:
+            elapsed = (datetime.now() - start).total_seconds()
+            print(f"  {'─'*60}")
+            print(f"  Průběh: emaily={email_i}/{total}  nove={new_count}  dup={dup_count}  skip={skip_count}  err={err_count}")
+            print(f"  {'─'*60}")
+
+    flush()
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    files_total   = col_index.count_documents({})
+    size_total    = sum(d.get("size_bytes", 0) for d in col_index.find({}, {"size_bytes": 1}))
+
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  emaily={ok_count}  |  nove={new_count}  |  dup={dup_count}  |  skip={skip_count}  |  err={err_count}")
+    print(f"Souboru v indexu: {files_total}  ({size_total / 1024 / 1024:.1f} MB)")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,560 @@
+"""
+parse_emails_graph_v1.0.py
+Nazev:  parse_emails_graph_v1.0.py
+Verze:  1.0
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Cte vsechny emaily ze schranky ordinace@buzalkova.cz primo pres
+    Microsoft Graph API a importuje je jako dokumenty do MongoDB.
+    Ze kazde zpravy extrahuje vsechny dostupne vlastnosti:
+
+        - predmet, odesilatel, prijemci (To/CC/BCC s typy)
+        - cas doruceni, odeslani, vytvoreni, modifikace (UTC)
+        - telo HTML (max 2 MB) + textovy preview
+        - prilohy (metadata: jmeno, velikost, MIME typ, inline flag)
+        - internet headers (SPF, DKIM, Received, X-*, ...)
+        - MAPI-ekvivalenty: dulezitost, priznak, konverzacni vlakno,
+          kategorie, In-Reply-To, References, ...
+        - navic: isRead, isDraft, folder_path, inferenceClassification
+
+    Prochazi VSECHNY slozky schranky rekurzivne (Inbox, Sent, Deleted,
+    archivni slozky, ...).
+
+    DB:       emaily
+    Kolekce:  ordinace@buzalkova.cz
+    _id:      Internet Message-ID (nebo "graphid:<id>" jako fallback)
+
+    Bezpecne prerusit a opakovat:
+        - upsert podle _id — duplicity se automaticky prepisi
+        - --skip-existing nacte seznam hotovych _id z MongoDB a preskoci je
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    python parse_emails_graph_v1.0.py                    # kompletni import
+    python parse_emails_graph_v1.0.py --limit 50         # test na prvnich 50
+    python parse_emails_graph_v1.0.py --skip-existing    # pokracovani po preruseni
+    python parse_emails_graph_v1.0.py --folder Inbox     # jen jedna slozka
+    python parse_emails_graph_v1.0.py --no-indexes       # bez indexu na konci
+
+Zavislosti:
+    msal, requests, pymongo, python-dateutil
+    Python 3.10+
+
+Struktura dokumentu v MongoDB:
+    _id                     Internet Message-ID (nebo graphid: fallback)
+    graph_id                Graph API message ID (pro pripadne dalsi operace)
+    subject                 predmet zpravy
+    normalized_subject      predmet bez RE:/FW:/AW: prefixu
+    importance              0=nizka 1=normalni 2=vysoka
+    flag_status             0=bez priznaku 1=oznaceno 2=dokonceno
+    is_read                 bool — aktualni stav precteni ve schrance
+    is_draft                bool
+    has_attachments         bool
+    attachment_count        int
+    inference_classification focused / other (Outlook AI trideni)
+    categories              [str]
+    conversation_id         Graph conversationId
+    conversation_index      base64 conversationIndex
+    conversation_topic      tema vlakna (z internet headers Thread-Topic)
+    in_reply_to             Message-ID predchozi zpravy
+    internet_references     [Message-ID] — cela historia vlakna
+    received_at             datetime UTC
+    sent_at                 datetime UTC
+    created_at              datetime UTC — cas vytvoreni zaznamu v M365
+    modified_at             datetime UTC — cas posledni modifikace
+    folder_id               Graph parentFolderId
+    folder_path             cela cesta slozky (napr. Inbox/Subfolder)
+    sender.email            emailova adresa odesilatele
+    sender.name             zobrazovane jmeno odesilatele
+    to                      retezec To (joined)
+    cc                      retezec CC
+    bcc                     retezec BCC
+    recipients              [{type, email, name}] — to/cc/bcc s typy
+    body_html               HTML telo (max 2 MB)
+    body_preview            textovy nahled (max 255 znaku z Graph)
+    attachments             [{filename, size_bytes, mime_type,
+                              content_id, is_inline}]
+    headers                 dict internet headers (lowercase_s_podtrzitky)
+    parsed_at               datetime UTC — cas parsovani
+
+Indexy:
+    received_at, sent_at, sender.email, graph_id (unique),
+    conversation_id, folder_path, has_attachments, categories,
+    importance, flag_status, is_read,
+    text_search (subject + body_preview + to + cc)
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze — Graph API jako zdroj
+"""
+
+import sys
+import re
+import logging
+import argparse
+import base64
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from dateutil import parser as dtparser
+from pymongo import MongoClient, UpdateOne, ASCENDING, TEXT
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_MAILBOX       = "ordinace@buzalkova.cz"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI      = "mongodb://192.168.1.76:27017"
+MONGO_DB       = "emaily"
+MONGO_COL      = "ordinace@buzalkova.cz"
+BATCH_SIZE     = 100
+PAGE_SIZE      = 50
+LOG_FILE       = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION = "1.0"
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+IMPORTANCE_MAP  = {"low": 0, "normal": 1, "high": 2}
+FLAG_STATUS_MAP = {"notFlagged": 0, "flagged": 1, "complete": 2}
+RE_SUBJECT      = re.compile(r"^(RE|FW|AW|SV|VS|TR|WG|odpov[eě]d[ťt]|fwd?)[:\s]+", re.IGNORECASE)
+
+MSG_SELECT = (
+    "id,internetMessageId,subject,bodyPreview,body,"
+    "importance,isRead,isDraft,hasAttachments,"
+    "receivedDateTime,sentDateTime,createdDateTime,lastModifiedDateTime,"
+    "sender,from,toRecipients,ccRecipients,bccRecipients,replyTo,"
+    "conversationId,conversationIndex,parentFolderId,"
+    "categories,flag,inferenceClassification,internetMessageHeaders"
+)
+
+
+# ─── Graph API helpers ────────────────────────────────────────────────────────
+
+_graph_token: Optional[str] = None
+
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET failed after retry: {url}")
+
+
+def get_all_folders(parent_id: str = None, parent_path: str = "") -> list[dict]:
+    """Rekurzivne nacte vsechny slozky schranky. Vraci [{id, path}]."""
+    if parent_id is None:
+        url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders"
+    else:
+        url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
+
+    folders = []
+    params = {"$top": 100, "$select": "id,displayName,childFolderCount"}
+    while url:
+        data = graph_get(url, params)
+        for f in data.get("value", []):
+            path = f"{parent_path}/{f['displayName']}".lstrip("/")
+            folders.append({"id": f["id"], "path": path})
+            if f.get("childFolderCount", 0) > 0:
+                folders.extend(get_all_folders(f["id"], path))
+        url = data.get("@odata.nextLink")
+        params = None
+    return folders
+
+
+def iter_folder_messages(folder_id: str):
+    """Generator: vraci zpravy ze slozky po strankach."""
+    url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
+    params = {"$top": PAGE_SIZE, "$select": MSG_SELECT, "$expand": "attachments"}
+    while url:
+        data = graph_get(url, params)
+        for msg in data.get("value", []):
+            yield msg
+        url = data.get("@odata.nextLink")
+        params = None
+
+
+# ─── Pomocné funkce ───────────────────────────────────────────────────────────
+
+def parse_date(raw) -> Optional[datetime]:
+    if raw is None:
+        return None
+    if isinstance(raw, datetime):
+        if raw.tzinfo:
+            return raw.astimezone(timezone.utc).replace(tzinfo=None)
+        return raw
+    try:
+        dt = dtparser.parse(str(raw))
+        if dt.tzinfo:
+            return dt.astimezone(timezone.utc).replace(tzinfo=None)
+        return dt
+    except Exception:
+        return None
+
+
+def normalize_subject(subject: str) -> str:
+    s = subject.strip()
+    while True:
+        m = RE_SUBJECT.match(s)
+        if not m:
+            break
+        s = s[m.end():].strip()
+    return s
+
+
+def parse_headers(raw_headers: list) -> dict:
+    result = {}
+    for h in raw_headers:
+        k = h["name"].lower().replace("-", "_")
+        v = h["value"]
+        if k in result:
+            existing = result[k]
+            if isinstance(existing, list):
+                existing.append(v)
+            else:
+                result[k] = [existing, v]
+        else:
+            result[k] = v
+    return result
+
+
+def format_recipients(lst: list) -> str:
+    return "; ".join(
+        f'{r["emailAddress"].get("name", "")} <{r["emailAddress"].get("address", "")}>'.strip()
+        for r in lst
+    )
+
+
+# ─── Hlavní extrakce ─────────────────────────────────────────────────────────
+
+def extract_message(msg: dict, folder_path: str) -> Optional[dict]:
+    try:
+        # _id
+        mid = (msg.get("internetMessageId") or "").strip()
+        if not mid:
+            mid = f"graphid:{msg['id']}"
+
+        subject = msg.get("subject") or ""
+        norm_subject = normalize_subject(subject)
+
+        # tělo
+        body_html = None
+        body_preview = msg.get("bodyPreview") or ""
+        body = msg.get("body", {})
+        if body.get("contentType") == "html":
+            content = body.get("content") or ""
+            body_html = content if len(content) <= 2 * 1024 * 1024 else content[:2 * 1024 * 1024]
+        elif body.get("contentType") == "text":
+            body_preview = (body.get("content") or "")[:2000]
+
+        # odesílatel
+        sender_ea = (msg.get("from") or msg.get("sender") or {}).get("emailAddress", {})
+        sender_email = sender_ea.get("address", "")
+        sender_name  = sender_ea.get("name", "")
+
+        # příjemci
+        to_list  = msg.get("toRecipients", [])
+        cc_list  = msg.get("ccRecipients", [])
+        bcc_list = msg.get("bccRecipients", [])
+
+        recipients = (
+            [{"type": "to",  "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in to_list] +
+            [{"type": "cc",  "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in cc_list] +
+            [{"type": "bcc", "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in bcc_list]
+        )
+
+        # příznaky
+        importance  = IMPORTANCE_MAP.get(msg.get("importance", "normal"), 1)
+        flag_status = FLAG_STATUS_MAP.get((msg.get("flag") or {}).get("flagStatus", "notFlagged"), 0)
+
+        # internet headers
+        raw_headers = msg.get("internetMessageHeaders") or []
+        headers = parse_headers(raw_headers)
+
+        in_reply_to = headers.get("in_reply_to", "")
+        if isinstance(in_reply_to, list):
+            in_reply_to = in_reply_to[0]
+
+        refs_raw = headers.get("references", "")
+        if isinstance(refs_raw, list):
+            refs_raw = " ".join(refs_raw)
+        internet_refs = [r.strip() for r in refs_raw.split() if r.strip()] if refs_raw else []
+
+        conv_topic = headers.get("thread_topic", "")
+        if isinstance(conv_topic, list):
+            conv_topic = conv_topic[0]
+
+        # conversation index
+        conv_index = ""
+        ci_raw = msg.get("conversationIndex")
+        if ci_raw:
+            try:
+                conv_index = base64.b64encode(base64.b64decode(ci_raw)).decode()
+            except Exception:
+                conv_index = ci_raw
+
+        # přílohy (jen metadata, bez obsahu)
+        attachments = []
+        for att in msg.get("attachments") or []:
+            fname = att.get("name") or ""
+            if not fname:
+                continue
+            attachments.append({
+                "filename":   fname,
+                "size_bytes": att.get("size", 0),
+                "mime_type":  att.get("contentType", "application/octet-stream"),
+                "content_id": att.get("contentId"),
+                "is_inline":  att.get("isInline", False),
+            })
+
+        return {
+            "_id":     mid,
+            "graph_id": msg["id"],
+
+            "subject":            subject,
+            "normalized_subject": norm_subject,
+            "importance":         importance,
+            "flag_status":        flag_status,
+            "is_read":            msg.get("isRead", False),
+            "is_draft":           msg.get("isDraft", False),
+            "has_attachments":    msg.get("hasAttachments", False),
+            "attachment_count":   len(attachments),
+            "inference_classification": msg.get("inferenceClassification", ""),
+            "categories":         msg.get("categories") or [],
+
+            "conversation_id":    msg.get("conversationId", ""),
+            "conversation_index": conv_index,
+            "conversation_topic": conv_topic,
+            "in_reply_to":        in_reply_to,
+            "internet_references": internet_refs,
+
+            "received_at": parse_date(msg.get("receivedDateTime")),
+            "sent_at":     parse_date(msg.get("sentDateTime")),
+            "created_at":  parse_date(msg.get("createdDateTime")),
+            "modified_at": parse_date(msg.get("lastModifiedDateTime")),
+
+            "folder_id":   msg.get("parentFolderId", ""),
+            "folder_path": folder_path,
+
+            "sender": {
+                "email": sender_email,
+                "name":  sender_name,
+            },
+            "to":         format_recipients(to_list),
+            "cc":         format_recipients(cc_list),
+            "bcc":        format_recipients(bcc_list),
+            "recipients": recipients,
+
+            "body_html":    body_html,
+            "body_preview": body_preview,
+
+            "attachments": attachments,
+            "headers":     headers,
+
+            "parsed_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        }
+
+    except Exception as e:
+        logging.error("extract_message failed [%s]: %s", msg.get("id", "?"), e)
+        return None
+
+
+# ─── MongoDB indexy ───────────────────────────────────────────────────────────
+
+def create_indexes(col):
+    print("  Vytvarim indexy...")
+    col.create_index([("received_at",    ASCENDING)])
+    col.create_index([("sent_at",        ASCENDING)])
+    col.create_index([("sender.email",   ASCENDING)])
+    col.create_index([("graph_id",       ASCENDING)], unique=True, sparse=True)
+    col.create_index([("conversation_id", ASCENDING)])
+    col.create_index([("folder_path",    ASCENDING)])
+    col.create_index([("has_attachments", ASCENDING)])
+    col.create_index([("categories",     ASCENDING)])
+    col.create_index([("importance",     ASCENDING)])
+    col.create_index([("flag_status",    ASCENDING)])
+    col.create_index([("is_read",        ASCENDING)])
+    col.create_index([
+        ("subject",       TEXT),
+        ("body_preview",  TEXT),
+        ("to",            TEXT),
+        ("cc",            TEXT),
+    ], name="text_search", default_language="none")
+    print("  Indexy hotovy.")
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"parse_emails_graph v{SCRIPT_VERSION}")
+    ap.add_argument("--limit",         type=int, default=0,
+                    help="Zpracovat max N zprav (0 = vse)")
+    ap.add_argument("--skip-existing", action="store_true",
+                    help="Preskocit zpravy ktere jiz jsou v MongoDB")
+    ap.add_argument("--folder",        default="",
+                    help="Zpracovat jen slozku se zadanym nazvem (napr. Inbox)")
+    ap.add_argument("--no-indexes",    action="store_true",
+                    help="Nevytvorit indexy na konci")
+    args = ap.parse_args()
+
+    start = datetime.now()
+    print(f"=== parse_emails_graph v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {GRAPH_MAILBOX}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}.{MONGO_COL}")
+
+    # Graph token
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    # MongoDB
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+    col = client[MONGO_DB][MONGO_COL]
+
+    # Skip existing
+    existing: set = set()
+    if args.skip_existing:
+        print("  Nacitam existujici zaznamy z MongoDB...")
+        existing = set(col.distinct("_id"))
+        print(f"  {len(existing)} jiz importovano")
+
+    # Slozky
+    print("\nNacitam seznam slozek...")
+    all_folders = get_all_folders()
+    if args.folder:
+        all_folders = [f for f in all_folders if args.folder.lower() in f["path"].lower()]
+    print(f"  Slozek ke zpracovani: {len(all_folders)}")
+    for f in all_folders:
+        print(f"    {f['path']}")
+
+    # Import
+    batch     = []
+    ok_count  = 0
+    err_count = 0
+    skip_count = 0
+    total_i   = 0
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    print()
+    for folder in all_folders:
+        print(f"--- Složka: {folder['path']} ---")
+        folder_count = 0
+
+        for msg in iter_folder_messages(folder["id"]):
+            if args.limit and total_i >= args.limit:
+                break
+
+            mid = (msg.get("internetMessageId") or "").strip() or f"graphid:{msg['id']}"
+
+            if mid in existing:
+                skip_count += 1
+                total_i += 1
+                continue
+
+            doc = extract_message(msg, folder["path"])
+            total_i += 1
+            folder_count += 1
+
+            if doc is None:
+                err_count += 1
+            else:
+                batch.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=True))
+                ok_count += 1
+
+            if len(batch) >= BATCH_SIZE:
+                flush()
+
+            status      = "ERR " if doc is None else "OK  "
+            subject_str = (doc.get("subject") or "")[:60] if doc else "?"
+            sender_str  = (doc.get("sender", {}).get("email") or "")[:40] if doc else "?"
+            print(f"  {total_i:>6}  {status}  {subject_str:<60}  {sender_str}")
+
+            if total_i % 500 == 0:
+                elapsed = (datetime.now() - start).total_seconds()
+                rate    = total_i / elapsed if elapsed > 0 else 0
+                print(f"  {'─'*80}")
+                print(f"  Průběh: ok={ok_count}  skip={skip_count}  err={err_count}  {rate:.1f} msg/s")
+                print(f"  {'─'*80}")
+
+        flush()
+        print(f"  → {folder_count} zprav ze slozky {folder['path']}")
+
+        if args.limit and total_i >= args.limit:
+            break
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  ok={ok_count}  |  skip={skip_count}  |  err={err_count}")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"Dokumentu v kolekci: {col.count_documents({})}")
+
+    if not args.no_indexes:
+        print()
+        create_indexes(col)
+
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,605 @@
+"""
+parse_emails_graph_v1.1.py
+Nazev:  parse_emails_graph_v1.1.py
+Verze:  1.1
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Cte vsechny emaily ze schranky ordinace@buzalkova.cz primo pres
+    Microsoft Graph API a importuje je jako dokumenty do MongoDB.
+    Ze kazde zpravy extrahuje vsechny dostupne vlastnosti:
+
+        - predmet, odesilatel, prijemci (To/CC/BCC s typy)
+        - cas doruceni, odeslani, vytvoreni, modifikace (UTC)
+        - telo HTML (max 2 MB) + textovy preview
+        - prilohy (metadata: jmeno, velikost, MIME typ, inline flag)
+        - internet headers (SPF, DKIM, Received, X-*, ...)
+        - MAPI-ekvivalenty: dulezitost, priznak, konverzacni vlakno,
+          kategorie, In-Reply-To, References, ...
+        - navic: isRead, isDraft, folder_path, inferenceClassification
+
+    Prochazi VSECHNY slozky schranky rekurzivne (Inbox, Sent, Deleted,
+    archivni slozky, ...).
+
+    DB:       emaily
+    Kolekce:  ordinace@buzalkova.cz
+    _id:      Internet Message-ID (nebo "graphid:<id>" jako fallback)
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    # Prvni import (vsechno):
+    python parse_emails_graph_v1.1.py
+
+    # Test na prvnich 50:
+    python parse_emails_graph_v1.1.py --limit 50 --no-indexes
+
+    # Jen jedna slozka:
+    python parse_emails_graph_v1.1.py --folder Inbox
+
+    # Pokracovani po preruseni (pouze nove):
+    python parse_emails_graph_v1.1.py --mode new-only
+
+    # Pravidelny sync (aktualizuje is_read, flag, slozku; importuje nove):
+    python parse_emails_graph_v1.1.py --mode sync
+
+    # Plny reimport vsech dat:
+    python parse_emails_graph_v1.1.py --mode full
+
+Rezimy (--mode):
+    full      Plny upsert vsech poli pro kazdou zpravu (vychozi)
+    new-only  Preskoci zpravy ktere uz jsou v MongoDB, importuje jen nove
+    sync      Existujici: aktualizuje jen is_read/flag_status/categories/
+              modified_at/folder_path. Nove zpravy importuje cely.
+              Idealni pro pravidelne spousteni.
+
+Zavislosti:
+    msal, requests, pymongo, python-dateutil
+    Python 3.10+
+
+Struktura dokumentu v MongoDB:
+    _id                     Internet Message-ID (nebo graphid: fallback)
+    graph_id                Graph API message ID
+    subject                 predmet zpravy
+    normalized_subject      predmet bez RE:/FW:/AW: prefixu
+    importance              0=nizka 1=normalni 2=vysoka
+    flag_status             0=bez priznaku 1=oznaceno 2=dokonceno
+    is_read                 bool — aktualni stav precteni ve schrance
+    is_draft                bool
+    has_attachments         bool
+    attachment_count        int
+    inference_classification focused / other
+    categories              [str]
+    conversation_id         Graph conversationId
+    conversation_index      base64 conversationIndex
+    conversation_topic      tema vlakna (z internet headers Thread-Topic)
+    in_reply_to             Message-ID predchozi zpravy
+    internet_references     [Message-ID]
+    received_at             datetime UTC
+    sent_at                 datetime UTC
+    created_at              datetime UTC
+    modified_at             datetime UTC
+    folder_id               Graph parentFolderId
+    folder_path             cela cesta slozky (napr. Inbox/Subfolder)
+    sender.email            emailova adresa odesilatele
+    sender.name             zobrazovane jmeno
+    to                      retezec To (joined)
+    cc                      retezec CC
+    bcc                     retezec BCC
+    recipients              [{type, email, name}]
+    body_html               HTML telo (max 2 MB)
+    body_preview            textovy nahled (max 255 znaku)
+    attachments             [{filename, size_bytes, mime_type, content_id, is_inline}]
+    headers                 dict internet headers
+    parsed_at               datetime UTC
+
+Indexy:
+    received_at, sent_at, sender.email, graph_id (unique),
+    conversation_id, folder_path, has_attachments, categories,
+    importance, flag_status, is_read,
+    text_search (subject + body_preview + to + cc)
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze
+    1.1  2026-06-02  Pridany rezimy --mode full/new-only/sync;
+                     odstranen --skip-existing (nahrazen --mode new-only)
+"""
+
+import sys
+import re
+import logging
+import argparse
+import base64
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from dateutil import parser as dtparser
+from pymongo import MongoClient, UpdateOne, ASCENDING, TEXT
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_MAILBOX       = "ordinace@buzalkova.cz"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI      = "mongodb://192.168.1.76:27017"
+MONGO_DB       = "emaily"
+MONGO_COL      = "ordinace@buzalkova.cz"
+BATCH_SIZE     = 100
+PAGE_SIZE      = 50
+LOG_FILE       = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION = "1.1"
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+IMPORTANCE_MAP  = {"low": 0, "normal": 1, "high": 2}
+FLAG_STATUS_MAP = {"notFlagged": 0, "flagged": 1, "complete": 2}
+RE_SUBJECT      = re.compile(r"^(RE|FW|AW|SV|VS|TR|WG|odpov[eě]d[ťt]|fwd?)[:\s]+", re.IGNORECASE)
+
+MSG_SELECT = (
+    "id,internetMessageId,subject,bodyPreview,body,"
+    "importance,isRead,isDraft,hasAttachments,"
+    "receivedDateTime,sentDateTime,createdDateTime,lastModifiedDateTime,"
+    "sender,from,toRecipients,ccRecipients,bccRecipients,replyTo,"
+    "conversationId,conversationIndex,parentFolderId,"
+    "categories,flag,inferenceClassification,internetMessageHeaders"
+)
+
+# Pro sync mode staci jen menitelna pole — rychlejsi fetch
+MSG_SELECT_SYNC = (
+    "id,internetMessageId,isRead,isDraft,flag,categories,"
+    "lastModifiedDateTime,parentFolderId,importance"
+)
+
+
+# ─── Graph API helpers ────────────────────────────────────────────────────────
+
+_graph_token: Optional[str] = None
+
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET failed after retry: {url}")
+
+
+def get_all_folders(parent_id: str = None, parent_path: str = "") -> list[dict]:
+    """Rekurzivne nacte vsechny slozky schranky. Vraci [{id, path}]."""
+    if parent_id is None:
+        url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders"
+    else:
+        url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
+
+    folders = []
+    params = {"$top": 100, "$select": "id,displayName,childFolderCount"}
+    while url:
+        data = graph_get(url, params)
+        for f in data.get("value", []):
+            path = f"{parent_path}/{f['displayName']}".lstrip("/")
+            folders.append({"id": f["id"], "path": path})
+            if f.get("childFolderCount", 0) > 0:
+                folders.extend(get_all_folders(f["id"], path))
+        url = data.get("@odata.nextLink")
+        params = None
+    return folders
+
+
+def iter_folder_messages(folder_id: str, select: str = MSG_SELECT, expand_attachments: bool = True):
+    """Generator: vraci zpravy ze slozky po strankach."""
+    url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
+    params = {"$top": PAGE_SIZE, "$select": select}
+    if expand_attachments:
+        params["$expand"] = "attachments"
+    while url:
+        data = graph_get(url, params)
+        for msg in data.get("value", []):
+            yield msg
+        url = data.get("@odata.nextLink")
+        params = None
+
+
+# ─── Pomocné funkce ───────────────────────────────────────────────────────────
+
+def parse_date(raw) -> Optional[datetime]:
+    if raw is None:
+        return None
+    if isinstance(raw, datetime):
+        if raw.tzinfo:
+            return raw.astimezone(timezone.utc).replace(tzinfo=None)
+        return raw
+    try:
+        dt = dtparser.parse(str(raw))
+        if dt.tzinfo:
+            return dt.astimezone(timezone.utc).replace(tzinfo=None)
+        return dt
+    except Exception:
+        return None
+
+
+def normalize_subject(subject: str) -> str:
+    s = subject.strip()
+    while True:
+        m = RE_SUBJECT.match(s)
+        if not m:
+            break
+        s = s[m.end():].strip()
+    return s
+
+
+def parse_headers(raw_headers: list) -> dict:
+    result = {}
+    for h in raw_headers:
+        k = h["name"].lower().replace("-", "_")
+        v = h["value"]
+        if k in result:
+            existing = result[k]
+            result[k] = existing + [v] if isinstance(existing, list) else [existing, v]
+        else:
+            result[k] = v
+    return result
+
+
+def format_recipients(lst: list) -> str:
+    return "; ".join(
+        f'{r["emailAddress"].get("name", "")} <{r["emailAddress"].get("address", "")}>'.strip()
+        for r in lst
+    )
+
+
+# ─── Extrakce zprávy ─────────────────────────────────────────────────────────
+
+def extract_message(msg: dict, folder_path: str) -> Optional[dict]:
+    """Plna extrakce — pouziva se pro mode full a nove zpravy v sync/new-only."""
+    try:
+        mid = (msg.get("internetMessageId") or "").strip() or f"graphid:{msg['id']}"
+        subject = msg.get("subject") or ""
+
+        body_html = None
+        body_preview = msg.get("bodyPreview") or ""
+        body = msg.get("body", {})
+        if body.get("contentType") == "html":
+            content = body.get("content") or ""
+            body_html = content if len(content) <= 2 * 1024 * 1024 else content[:2 * 1024 * 1024]
+        elif body.get("contentType") == "text":
+            body_preview = (body.get("content") or "")[:2000]
+
+        sender_ea    = (msg.get("from") or msg.get("sender") or {}).get("emailAddress", {})
+        to_list      = msg.get("toRecipients", [])
+        cc_list      = msg.get("ccRecipients", [])
+        bcc_list     = msg.get("bccRecipients", [])
+
+        recipients = (
+            [{"type": "to",  "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in to_list] +
+            [{"type": "cc",  "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in cc_list] +
+            [{"type": "bcc", "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in bcc_list]
+        )
+
+        importance  = IMPORTANCE_MAP.get(msg.get("importance", "normal"), 1)
+        flag_status = FLAG_STATUS_MAP.get((msg.get("flag") or {}).get("flagStatus", "notFlagged"), 0)
+
+        raw_headers   = msg.get("internetMessageHeaders") or []
+        headers       = parse_headers(raw_headers)
+
+        in_reply_to = headers.get("in_reply_to", "")
+        if isinstance(in_reply_to, list):
+            in_reply_to = in_reply_to[0]
+
+        refs_raw = headers.get("references", "")
+        if isinstance(refs_raw, list):
+            refs_raw = " ".join(refs_raw)
+        internet_refs = [r.strip() for r in refs_raw.split() if r.strip()] if refs_raw else []
+
+        conv_topic = headers.get("thread_topic", "")
+        if isinstance(conv_topic, list):
+            conv_topic = conv_topic[0]
+
+        conv_index = ""
+        ci_raw = msg.get("conversationIndex")
+        if ci_raw:
+            try:
+                conv_index = base64.b64encode(base64.b64decode(ci_raw)).decode()
+            except Exception:
+                conv_index = ci_raw
+
+        attachments = []
+        for att in msg.get("attachments") or []:
+            fname = att.get("name") or ""
+            if not fname:
+                continue
+            attachments.append({
+                "filename":   fname,
+                "size_bytes": att.get("size", 0),
+                "mime_type":  att.get("contentType", "application/octet-stream"),
+                "content_id": att.get("contentId"),
+                "is_inline":  att.get("isInline", False),
+            })
+
+        return {
+            "_id":      mid,
+            "graph_id": msg["id"],
+
+            "subject":            subject,
+            "normalized_subject": normalize_subject(subject),
+            "importance":         importance,
+            "flag_status":        flag_status,
+            "is_read":            msg.get("isRead", False),
+            "is_draft":           msg.get("isDraft", False),
+            "has_attachments":    msg.get("hasAttachments", False),
+            "attachment_count":   len(attachments),
+            "inference_classification": msg.get("inferenceClassification", ""),
+            "categories":         msg.get("categories") or [],
+
+            "conversation_id":     msg.get("conversationId", ""),
+            "conversation_index":  conv_index,
+            "conversation_topic":  conv_topic,
+            "in_reply_to":         in_reply_to,
+            "internet_references": internet_refs,
+
+            "received_at": parse_date(msg.get("receivedDateTime")),
+            "sent_at":     parse_date(msg.get("sentDateTime")),
+            "created_at":  parse_date(msg.get("createdDateTime")),
+            "modified_at": parse_date(msg.get("lastModifiedDateTime")),
+
+            "folder_id":   msg.get("parentFolderId", ""),
+            "folder_path": folder_path,
+
+            "sender": {
+                "email": sender_ea.get("address", ""),
+                "name":  sender_ea.get("name", ""),
+            },
+            "to":         format_recipients(to_list),
+            "cc":         format_recipients(cc_list),
+            "bcc":        format_recipients(bcc_list),
+            "recipients": recipients,
+
+            "body_html":    body_html,
+            "body_preview": body_preview,
+
+            "attachments": attachments,
+            "headers":     headers,
+
+            "parsed_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        }
+
+    except Exception as e:
+        logging.error("extract_message failed [%s]: %s", msg.get("id", "?"), e)
+        return None
+
+
+def extract_sync_fields(msg: dict, folder_path: str) -> dict:
+    """Jen menitelna pole — pouziva se v sync mode pro existujici zpravy."""
+    return {
+        "is_read":    msg.get("isRead", False),
+        "is_draft":   msg.get("isDraft", False),
+        "flag_status": FLAG_STATUS_MAP.get((msg.get("flag") or {}).get("flagStatus", "notFlagged"), 0),
+        "importance":  IMPORTANCE_MAP.get(msg.get("importance", "normal"), 1),
+        "categories":  msg.get("categories") or [],
+        "modified_at": parse_date(msg.get("lastModifiedDateTime")),
+        "folder_id":   msg.get("parentFolderId", ""),
+        "folder_path": folder_path,
+        "parsed_at":   datetime.now(timezone.utc).replace(tzinfo=None),
+    }
+
+
+# ─── MongoDB indexy ───────────────────────────────────────────────────────────
+
+def create_indexes(col):
+    print("  Vytvarim indexy...")
+    col.create_index([("received_at",     ASCENDING)])
+    col.create_index([("sent_at",         ASCENDING)])
+    col.create_index([("sender.email",    ASCENDING)])
+    col.create_index([("graph_id",        ASCENDING)], unique=True, sparse=True)
+    col.create_index([("conversation_id", ASCENDING)])
+    col.create_index([("folder_path",     ASCENDING)])
+    col.create_index([("has_attachments", ASCENDING)])
+    col.create_index([("categories",      ASCENDING)])
+    col.create_index([("importance",      ASCENDING)])
+    col.create_index([("flag_status",     ASCENDING)])
+    col.create_index([("is_read",         ASCENDING)])
+    col.create_index([
+        ("subject",      TEXT),
+        ("body_preview", TEXT),
+        ("to",           TEXT),
+        ("cc",           TEXT),
+    ], name="text_search", default_language="none")
+    print("  Indexy hotovy.")
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"parse_emails_graph v{SCRIPT_VERSION}")
+    ap.add_argument("--mode", default="full", choices=["full", "new-only", "sync"],
+                    help="full=plny upsert (vychozi) | new-only=jen nove zpravy | "
+                         "sync=existujici aktualizuje jen menitelna pole, nove importuje cely")
+    ap.add_argument("--limit",      type=int, default=0,
+                    help="Zpracovat max N zprav (0 = vse)")
+    ap.add_argument("--folder",     default="",
+                    help="Zpracovat jen slozku se zadanym nazvem (napr. Inbox)")
+    ap.add_argument("--no-indexes", action="store_true",
+                    help="Nevytvorit indexy na konci")
+    args = ap.parse_args()
+
+    start = datetime.now()
+    print(f"=== parse_emails_graph v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {GRAPH_MAILBOX}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}.{MONGO_COL}")
+    print(f"Režim:    {args.mode}")
+
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+    col = client[MONGO_DB][MONGO_COL]
+
+    # Existující _id (potřeba pro new-only a sync)
+    existing: set = set()
+    if args.mode in ("new-only", "sync"):
+        print("  Nacitam existujici zaznamy z MongoDB...")
+        existing = set(col.distinct("_id"))
+        print(f"  {len(existing)} jiz importovano")
+
+    print("\nNacitam seznam slozek...")
+    all_folders = get_all_folders()
+    if args.folder:
+        all_folders = [f for f in all_folders if args.folder.lower() in f["path"].lower()]
+    print(f"  Slozek ke zpracovani: {len(all_folders)}")
+    for f in all_folders:
+        print(f"    {f['path']}")
+
+    # V sync mode fetchujeme jen menitelna pole
+    is_sync    = args.mode == "sync"
+    msg_select = MSG_SELECT_SYNC if is_sync else MSG_SELECT
+    expand_att = not is_sync
+
+    batch      = []
+    ok_count   = 0
+    sync_count = 0
+    err_count  = 0
+    skip_count = 0
+    total_i    = 0
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    print()
+    for folder in all_folders:
+        print(f"--- Složka: {folder['path']} ---")
+        folder_count = 0
+
+        for msg in iter_folder_messages(folder["id"], select=msg_select, expand_attachments=expand_att):
+            if args.limit and total_i >= args.limit:
+                break
+
+            mid = (msg.get("internetMessageId") or "").strip() or f"graphid:{msg['id']}"
+            total_i += 1
+            folder_count += 1
+
+            if args.mode == "new-only" and mid in existing:
+                skip_count += 1
+                continue
+
+            if is_sync and mid in existing:
+                # Sync existujici — jen menitelna pole
+                fields = extract_sync_fields(msg, folder["path"])
+                batch.append(UpdateOne({"_id": mid}, {"$set": fields}))
+                sync_count += 1
+                status = "SYN "
+                print(f"  {total_i:>6}  {status}  {mid[:80]}")
+            else:
+                # Full extract (new-only nove, sync nove, full vse)
+                # Pro sync nove zpravy potrebujeme plny fetch
+                if is_sync:
+                    full_url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{msg['id']}"
+                    full_params = {"$select": MSG_SELECT, "$expand": "attachments"}
+                    try:
+                        msg = graph_get(full_url, full_params)
+                    except Exception as e:
+                        logging.error("full fetch failed [%s]: %s", msg.get("id","?"), e)
+                        err_count += 1
+                        continue
+
+                doc = extract_message(msg, folder["path"])
+                if doc is None:
+                    err_count += 1
+                    status = "ERR "
+                    print(f"  {total_i:>6}  {status}  {mid[:80]}")
+                else:
+                    batch.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=True))
+                    ok_count += 1
+                    status = "OK  "
+                    subject_str = (doc.get("subject") or "")[:60]
+                    sender_str  = (doc.get("sender", {}).get("email") or "")[:40]
+                    print(f"  {total_i:>6}  {status}  {subject_str:<60}  {sender_str}")
+
+            if len(batch) >= BATCH_SIZE:
+                flush()
+
+            if total_i % 500 == 0:
+                elapsed = (datetime.now() - start).total_seconds()
+                rate    = total_i / elapsed if elapsed > 0 else 0
+                print(f"  {'─'*80}")
+                print(f"  Průběh: ok={ok_count}  sync={sync_count}  skip={skip_count}  err={err_count}  {rate:.1f} msg/s")
+                print(f"  {'─'*80}")
+
+        flush()
+        print(f"  → {folder_count} zprav ze slozky {folder['path']}")
+
+        if args.limit and total_i >= args.limit:
+            break
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  ok={ok_count}  |  sync={sync_count}  |  skip={skip_count}  |  err={err_count}")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"Dokumentu v kolekci: {col.count_documents({})}")
+
+    if not args.no_indexes:
+        print()
+        create_indexes(col)
+
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,610 @@
+"""
+parse_emails_graph_v1.2.py
+Nazev:  parse_emails_graph_v1.2.py
+Verze:  1.2
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Cte vsechny emaily ze schranky ordinace@buzalkova.cz primo pres
+    Microsoft Graph API a importuje je jako dokumenty do MongoDB.
+    Ze kazde zpravy extrahuje vsechny dostupne vlastnosti:
+
+        - predmet, odesilatel, prijemci (To/CC/BCC s typy)
+        - cas doruceni, odeslani, vytvoreni, modifikace (UTC)
+        - telo HTML (max 2 MB) + textovy preview
+        - prilohy (metadata: jmeno, velikost, MIME typ, inline flag, graph_att_id)
+        - internet headers (SPF, DKIM, Received, X-*, ...)
+        - MAPI-ekvivalenty: dulezitost, priznak, konverzacni vlakno,
+          kategorie, In-Reply-To, References, ...
+        - navic: isRead, isDraft, folder_path, inferenceClassification
+
+    Prochazi VSECHNY slozky schranky rekurzivne (Inbox, Sent, Deleted,
+    archivni slozky, ...).
+
+    DB:       emaily
+    Kolekce:  ordinace@buzalkova.cz
+    _id:      Internet Message-ID (nebo "graphid:<id>" jako fallback)
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    # Prvni import (vsechno):
+    python parse_emails_graph_v1.2.py
+
+    # Test na prvnich 50:
+    python parse_emails_graph_v1.2.py --limit 50 --no-indexes
+
+    # Jen jedna slozka:
+    python parse_emails_graph_v1.2.py --folder Inbox
+
+    # Pokracovani po preruseni (pouze nove):
+    python parse_emails_graph_v1.2.py --mode new-only
+
+    # Pravidelny sync (aktualizuje is_read, flag, slozku; importuje nove):
+    python parse_emails_graph_v1.2.py --mode sync
+
+    # Plny reimport vsech dat:
+    python parse_emails_graph_v1.2.py --mode full
+
+Rezimy (--mode):
+    full      Plny upsert vsech poli pro kazdou zpravu (vychozi)
+    new-only  Preskoci zpravy ktere uz jsou v MongoDB, importuje jen nove
+    sync      Existujici: aktualizuje jen is_read/flag_status/categories/
+              modified_at/folder_path. Nove zpravy importuje cely.
+              Idealni pro pravidelne spousteni.
+
+Zavislosti:
+    msal, requests, pymongo, python-dateutil
+    Python 3.10+
+
+Struktura dokumentu v MongoDB:
+    _id                     Internet Message-ID (nebo graphid: fallback)
+    graph_id                Graph API message ID
+    subject                 predmet zpravy
+    normalized_subject      predmet bez RE:/FW:/AW: prefixu
+    importance              0=nizka 1=normalni 2=vysoka
+    flag_status             0=bez priznaku 1=oznaceno 2=dokonceno
+    is_read                 bool — aktualni stav precteni ve schrance
+    is_draft                bool
+    has_attachments         bool
+    attachment_count        int
+    inference_classification focused / other
+    categories              [str]
+    conversation_id         Graph conversationId
+    conversation_index      base64 conversationIndex
+    conversation_topic      tema vlakna (z internet headers Thread-Topic)
+    in_reply_to             Message-ID predchozi zpravy
+    internet_references     [Message-ID]
+    received_at             datetime UTC
+    sent_at                 datetime UTC
+    created_at              datetime UTC
+    modified_at             datetime UTC
+    folder_id               Graph parentFolderId
+    folder_path             cela cesta slozky (napr. Inbox/Subfolder)
+    sender.email            emailova adresa odesilatele
+    sender.name             zobrazovane jmeno
+    to                      retezec To (joined)
+    cc                      retezec CC
+    bcc                     retezec BCC
+    recipients              [{type, email, name}]
+    body_html               HTML telo (max 2 MB)
+    body_preview            textovy nahled (max 255 znaku)
+    attachments             [{filename, size_bytes, mime_type, is_inline, graph_att_id}]
+    headers                 dict internet headers
+    parsed_at               datetime UTC
+
+Indexy:
+    received_at, sent_at, sender.email, graph_id (unique),
+    conversation_id, folder_path, has_attachments, categories,
+    importance, flag_status, is_read,
+    text_search (subject + body_preview + to + cc)
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze
+    1.1  2026-06-02  Pridany rezimy --mode full/new-only/sync;
+                     odstranen --skip-existing (nahrazen --mode new-only)
+    1.2  2026-06-02  $expand attachments s $select (bez contentBytes — rychlejsi);
+                     prilohy ukladaji graph_att_id pro prime stazeni bez name-matchingu
+"""
+
+import sys
+import re
+import logging
+import argparse
+import base64
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from dateutil import parser as dtparser
+from pymongo import MongoClient, UpdateOne, ASCENDING, TEXT
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_MAILBOX       = "ordinace@buzalkova.cz"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI      = "mongodb://192.168.1.76:27017"
+MONGO_DB       = "emaily"
+MONGO_COL      = "ordinace@buzalkova.cz"
+BATCH_SIZE     = 100
+PAGE_SIZE      = 50
+LOG_FILE       = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION = "1.2"
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+IMPORTANCE_MAP  = {"low": 0, "normal": 1, "high": 2}
+FLAG_STATUS_MAP = {"notFlagged": 0, "flagged": 1, "complete": 2}
+RE_SUBJECT      = re.compile(r"^(RE|FW|AW|SV|VS|TR|WG|odpov[eě]d[ťt]|fwd?)[:\s]+", re.IGNORECASE)
+
+# $expand prilohy bez contentBytes — jen metadata co potrebujeme
+ATT_EXPAND = "attachments($select=id,name,contentType,size,isInline)"
+
+MSG_SELECT = (
+    "id,internetMessageId,subject,bodyPreview,body,"
+    "importance,isRead,isDraft,hasAttachments,"
+    "receivedDateTime,sentDateTime,createdDateTime,lastModifiedDateTime,"
+    "sender,from,toRecipients,ccRecipients,bccRecipients,replyTo,"
+    "conversationId,conversationIndex,parentFolderId,"
+    "categories,flag,inferenceClassification,internetMessageHeaders"
+)
+
+# Pro sync mode staci jen menitelna pole — rychlejsi fetch
+MSG_SELECT_SYNC = (
+    "id,internetMessageId,isRead,isDraft,flag,categories,"
+    "lastModifiedDateTime,parentFolderId,importance"
+)
+
+
+# ─── Graph API helpers ────────────────────────────────────────────────────────
+
+_graph_token: Optional[str] = None
+
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET failed after retry: {url}")
+
+
+def get_all_folders(parent_id: str = None, parent_path: str = "") -> list[dict]:
+    """Rekurzivne nacte vsechny slozky schranky. Vraci [{id, path}]."""
+    if parent_id is None:
+        url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders"
+    else:
+        url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
+
+    folders = []
+    params = {"$top": 100, "$select": "id,displayName,childFolderCount"}
+    while url:
+        data = graph_get(url, params)
+        for f in data.get("value", []):
+            path = f"{parent_path}/{f['displayName']}".lstrip("/")
+            folders.append({"id": f["id"], "path": path})
+            if f.get("childFolderCount", 0) > 0:
+                folders.extend(get_all_folders(f["id"], path))
+        url = data.get("@odata.nextLink")
+        params = None
+    return folders
+
+
+def iter_folder_messages(folder_id: str, select: str = MSG_SELECT, expand_attachments: bool = True):
+    """Generator: vraci zpravy ze slozky po strankach."""
+    url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
+    params = {"$top": PAGE_SIZE, "$select": select}
+    if expand_attachments:
+        params["$expand"] = ATT_EXPAND
+    while url:
+        data = graph_get(url, params)
+        for msg in data.get("value", []):
+            yield msg
+        url = data.get("@odata.nextLink")
+        params = None
+
+
+# ─── Pomocné funkce ───────────────────────────────────────────────────────────
+
+def parse_date(raw) -> Optional[datetime]:
+    if raw is None:
+        return None
+    if isinstance(raw, datetime):
+        if raw.tzinfo:
+            return raw.astimezone(timezone.utc).replace(tzinfo=None)
+        return raw
+    try:
+        dt = dtparser.parse(str(raw))
+        if dt.tzinfo:
+            return dt.astimezone(timezone.utc).replace(tzinfo=None)
+        return dt
+    except Exception:
+        return None
+
+
+def normalize_subject(subject: str) -> str:
+    s = subject.strip()
+    while True:
+        m = RE_SUBJECT.match(s)
+        if not m:
+            break
+        s = s[m.end():].strip()
+    return s
+
+
+def parse_headers(raw_headers: list) -> dict:
+    result = {}
+    for h in raw_headers:
+        k = h["name"].lower().replace("-", "_")
+        v = h["value"]
+        if k in result:
+            existing = result[k]
+            result[k] = existing + [v] if isinstance(existing, list) else [existing, v]
+        else:
+            result[k] = v
+    return result
+
+
+def format_recipients(lst: list) -> str:
+    return "; ".join(
+        f'{r["emailAddress"].get("name", "")} <{r["emailAddress"].get("address", "")}>'.strip()
+        for r in lst
+    )
+
+
+# ─── Extrakce zprávy ─────────────────────────────────────────────────────────
+
+def extract_message(msg: dict, folder_path: str) -> Optional[dict]:
+    """Plna extrakce — pouziva se pro mode full a nove zpravy v sync/new-only."""
+    try:
+        mid = (msg.get("internetMessageId") or "").strip() or f"graphid:{msg['id']}"
+        subject = msg.get("subject") or ""
+
+        body_html = None
+        body_preview = msg.get("bodyPreview") or ""
+        body = msg.get("body", {})
+        if body.get("contentType") == "html":
+            content = body.get("content") or ""
+            body_html = content if len(content) <= 2 * 1024 * 1024 else content[:2 * 1024 * 1024]
+        elif body.get("contentType") == "text":
+            body_preview = (body.get("content") or "")[:2000]
+
+        sender_ea    = (msg.get("from") or msg.get("sender") or {}).get("emailAddress", {})
+        to_list      = msg.get("toRecipients", [])
+        cc_list      = msg.get("ccRecipients", [])
+        bcc_list     = msg.get("bccRecipients", [])
+
+        recipients = (
+            [{"type": "to",  "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in to_list] +
+            [{"type": "cc",  "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in cc_list] +
+            [{"type": "bcc", "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in bcc_list]
+        )
+
+        importance  = IMPORTANCE_MAP.get(msg.get("importance", "normal"), 1)
+        flag_status = FLAG_STATUS_MAP.get((msg.get("flag") or {}).get("flagStatus", "notFlagged"), 0)
+
+        raw_headers   = msg.get("internetMessageHeaders") or []
+        headers       = parse_headers(raw_headers)
+
+        in_reply_to = headers.get("in_reply_to", "")
+        if isinstance(in_reply_to, list):
+            in_reply_to = in_reply_to[0]
+
+        refs_raw = headers.get("references", "")
+        if isinstance(refs_raw, list):
+            refs_raw = " ".join(refs_raw)
+        internet_refs = [r.strip() for r in refs_raw.split() if r.strip()] if refs_raw else []
+
+        conv_topic = headers.get("thread_topic", "")
+        if isinstance(conv_topic, list):
+            conv_topic = conv_topic[0]
+
+        conv_index = ""
+        ci_raw = msg.get("conversationIndex")
+        if ci_raw:
+            try:
+                conv_index = base64.b64encode(base64.b64decode(ci_raw)).decode()
+            except Exception:
+                conv_index = ci_raw
+
+        attachments = []
+        for att in msg.get("attachments") or []:
+            fname = att.get("name") or ""
+            if not fname:
+                continue
+            attachments.append({
+                "filename":     fname,
+                "size_bytes":   att.get("size", 0),
+                "mime_type":    att.get("contentType", "application/octet-stream"),
+                "is_inline":    att.get("isInline", False),
+                "graph_att_id": att.get("id"),
+            })
+
+        return {
+            "_id":      mid,
+            "graph_id": msg["id"],
+
+            "subject":            subject,
+            "normalized_subject": normalize_subject(subject),
+            "importance":         importance,
+            "flag_status":        flag_status,
+            "is_read":            msg.get("isRead", False),
+            "is_draft":           msg.get("isDraft", False),
+            "has_attachments":    msg.get("hasAttachments", False),
+            "attachment_count":   len(attachments),
+            "inference_classification": msg.get("inferenceClassification", ""),
+            "categories":         msg.get("categories") or [],
+
+            "conversation_id":     msg.get("conversationId", ""),
+            "conversation_index":  conv_index,
+            "conversation_topic":  conv_topic,
+            "in_reply_to":         in_reply_to,
+            "internet_references": internet_refs,
+
+            "received_at": parse_date(msg.get("receivedDateTime")),
+            "sent_at":     parse_date(msg.get("sentDateTime")),
+            "created_at":  parse_date(msg.get("createdDateTime")),
+            "modified_at": parse_date(msg.get("lastModifiedDateTime")),
+
+            "folder_id":   msg.get("parentFolderId", ""),
+            "folder_path": folder_path,
+
+            "sender": {
+                "email": sender_ea.get("address", ""),
+                "name":  sender_ea.get("name", ""),
+            },
+            "to":         format_recipients(to_list),
+            "cc":         format_recipients(cc_list),
+            "bcc":        format_recipients(bcc_list),
+            "recipients": recipients,
+
+            "body_html":    body_html,
+            "body_preview": body_preview,
+
+            "attachments": attachments,
+            "headers":     headers,
+
+            "parsed_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        }
+
+    except Exception as e:
+        logging.error("extract_message failed [%s]: %s", msg.get("id", "?"), e)
+        return None
+
+
+def extract_sync_fields(msg: dict, folder_path: str) -> dict:
+    """Jen menitelna pole — pouziva se v sync mode pro existujici zpravy."""
+    return {
+        "is_read":    msg.get("isRead", False),
+        "is_draft":   msg.get("isDraft", False),
+        "flag_status": FLAG_STATUS_MAP.get((msg.get("flag") or {}).get("flagStatus", "notFlagged"), 0),
+        "importance":  IMPORTANCE_MAP.get(msg.get("importance", "normal"), 1),
+        "categories":  msg.get("categories") or [],
+        "modified_at": parse_date(msg.get("lastModifiedDateTime")),
+        "folder_id":   msg.get("parentFolderId", ""),
+        "folder_path": folder_path,
+        "parsed_at":   datetime.now(timezone.utc).replace(tzinfo=None),
+    }
+
+
+# ─── MongoDB indexy ───────────────────────────────────────────────────────────
+
+def create_indexes(col):
+    print("  Vytvarim indexy...")
+    col.create_index([("received_at",     ASCENDING)])
+    col.create_index([("sent_at",         ASCENDING)])
+    col.create_index([("sender.email",    ASCENDING)])
+    col.create_index([("graph_id",        ASCENDING)], unique=True, sparse=True)
+    col.create_index([("conversation_id", ASCENDING)])
+    col.create_index([("folder_path",     ASCENDING)])
+    col.create_index([("has_attachments", ASCENDING)])
+    col.create_index([("categories",      ASCENDING)])
+    col.create_index([("importance",      ASCENDING)])
+    col.create_index([("flag_status",     ASCENDING)])
+    col.create_index([("is_read",         ASCENDING)])
+    col.create_index([
+        ("subject",      TEXT),
+        ("body_preview", TEXT),
+        ("to",           TEXT),
+        ("cc",           TEXT),
+    ], name="text_search", default_language="none")
+    print("  Indexy hotovy.")
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"parse_emails_graph v{SCRIPT_VERSION}")
+    ap.add_argument("--mode", default="full", choices=["full", "new-only", "sync"],
+                    help="full=plny upsert (vychozi) | new-only=jen nove zpravy | "
+                         "sync=existujici aktualizuje jen menitelna pole, nove importuje cely")
+    ap.add_argument("--limit",      type=int, default=0,
+                    help="Zpracovat max N zprav (0 = vse)")
+    ap.add_argument("--folder",     default="",
+                    help="Zpracovat jen slozku se zadanym nazvem (napr. Inbox)")
+    ap.add_argument("--no-indexes", action="store_true",
+                    help="Nevytvorit indexy na konci")
+    args = ap.parse_args()
+
+    start = datetime.now()
+    print(f"=== parse_emails_graph v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {GRAPH_MAILBOX}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}.{MONGO_COL}")
+    print(f"Režim:    {args.mode}")
+
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+    col = client[MONGO_DB][MONGO_COL]
+
+    # Existující _id (potřeba pro new-only a sync)
+    existing: set = set()
+    if args.mode in ("new-only", "sync"):
+        print("  Nacitam existujici zaznamy z MongoDB...")
+        existing = set(col.distinct("_id"))
+        print(f"  {len(existing)} jiz importovano")
+
+    print("\nNacitam seznam slozek...")
+    all_folders = get_all_folders()
+    if args.folder:
+        all_folders = [f for f in all_folders if args.folder.lower() in f["path"].lower()]
+    print(f"  Slozek ke zpracovani: {len(all_folders)}")
+    for f in all_folders:
+        print(f"    {f['path']}")
+
+    # V sync mode fetchujeme jen menitelna pole
+    is_sync    = args.mode == "sync"
+    msg_select = MSG_SELECT_SYNC if is_sync else MSG_SELECT
+    expand_att = not is_sync
+
+    batch      = []
+    ok_count   = 0
+    sync_count = 0
+    err_count  = 0
+    skip_count = 0
+    total_i    = 0
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    print()
+    for folder in all_folders:
+        print(f"--- Složka: {folder['path']} ---")
+        folder_count = 0
+
+        for msg in iter_folder_messages(folder["id"], select=msg_select, expand_attachments=expand_att):
+            if args.limit and total_i >= args.limit:
+                break
+
+            mid = (msg.get("internetMessageId") or "").strip() or f"graphid:{msg['id']}"
+            total_i += 1
+            folder_count += 1
+
+            if args.mode == "new-only" and mid in existing:
+                skip_count += 1
+                continue
+
+            if is_sync and mid in existing:
+                # Sync existujici — jen menitelna pole
+                fields = extract_sync_fields(msg, folder["path"])
+                batch.append(UpdateOne({"_id": mid}, {"$set": fields}))
+                sync_count += 1
+                status = "SYN "
+                print(f"  {total_i:>6}  {status}  {mid[:80]}")
+            else:
+                # Full extract (new-only nove, sync nove, full vse)
+                # Pro sync nove zpravy potrebujeme plny fetch
+                if is_sync:
+                    full_url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{msg['id']}"
+                    full_params = {"$select": MSG_SELECT, "$expand": ATT_EXPAND}
+                    try:
+                        msg = graph_get(full_url, full_params)
+                    except Exception as e:
+                        logging.error("full fetch failed [%s]: %s", msg.get("id","?"), e)
+                        err_count += 1
+                        continue
+
+                doc = extract_message(msg, folder["path"])
+                if doc is None:
+                    err_count += 1
+                    status = "ERR "
+                    print(f"  {total_i:>6}  {status}  {mid[:80]}")
+                else:
+                    batch.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=True))
+                    ok_count += 1
+                    status = "OK  "
+                    subject_str = (doc.get("subject") or "")[:60]
+                    sender_str  = (doc.get("sender", {}).get("email") or "")[:40]
+                    print(f"  {total_i:>6}  {status}  {subject_str:<60}  {sender_str}")
+
+            if len(batch) >= BATCH_SIZE:
+                flush()
+
+            if total_i % 500 == 0:
+                elapsed = (datetime.now() - start).total_seconds()
+                rate    = total_i / elapsed if elapsed > 0 else 0
+                print(f"  {'─'*80}")
+                print(f"  Průběh: ok={ok_count}  sync={sync_count}  skip={skip_count}  err={err_count}  {rate:.1f} msg/s")
+                print(f"  {'─'*80}")
+
+        flush()
+        print(f"  → {folder_count} zprav ze slozky {folder['path']}")
+
+        if args.limit and total_i >= args.limit:
+            break
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  ok={ok_count}  |  sync={sync_count}  |  skip={skip_count}  |  err={err_count}")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"Dokumentu v kolekci: {col.count_documents({})}")
+
+    if not args.no_indexes:
+        print()
+        create_indexes(col)
+
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,449 @@
+"""
+download_attachments_v1.0.py
+Nazev:  download_attachments_v1.0.py
+Verze:  1.0
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Stahuje skutecne prilohy (is_inline=False) vsech emailu z MongoDB kolekce
+    ordinace@buzalkova.cz primo pres Microsoft Graph API a uklada je do
+    adresare /mnt/Emails/ordinace@buzalkova.cz/Attachments/.
+
+    Deduplikace podle SHA256 hashe obsahu:
+        - stejny hash = soubor uz existuje -> preskoci
+        - prvni vyskytu souboru: ulozi pod puvodnimnazvem
+        - kolize nazvu (stejny nazev, jiny hash): faktura_2.pdf, faktura_3.pdf ...
+
+    Po ulozeni aktualizuje MongoDB:
+        - v email dokumentu: kazda priloha dostane file_hash + local_path
+        - kolekce emaily.attachments_index: _id=hash, filename, path, size_bytes,
+          mime_type, first_seen_at, ref_count (pocet emailu ktery ji obsahuje)
+
+    Bezpecne prerusit a opakovat:
+        - zpravy kde jsou vsechny prilohy uz stazene (maji file_hash) se preskoci
+        - --force-recheck znovu overi i uz stazene (pro pripad zmen na disku)
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    python download_attachments_v1.0.py               # stahni vse co chybi
+    python download_attachments_v1.0.py --limit 50    # test na prvnich 50 emailech
+    python download_attachments_v1.0.py --force-recheck  # overi i uz stazene
+
+Docker (po pridani mountu /mnt/user/Emails -> /mnt/Emails):
+    docker exec -it python-runner python /scripts/download_attachments_v1.0.py
+
+Zavislosti:
+    msal, requests, pymongo, python-dateutil
+    Python 3.10+
+
+Struktura na disku:
+    /mnt/Emails/
+    └── ordinace@buzalkova.cz/
+        └── Attachments/
+            ├── faktura_2026.pdf
+            ├── vysledky_lab.pdf
+            ├── vysledky_lab_2.pdf   <- kolize nazvu, jiny obsah
+            └── ...
+
+Kolekce emaily.attachments_index:
+    _id          SHA256 hash (hex)
+    filename     nazev souboru na disku (prvni vyskytu)
+    local_path   relativni cesta od Attachments/ (zatim = filename)
+    size_bytes   velikost souboru
+    mime_type    MIME typ
+    first_seen_at  datetime UTC
+    ref_count    v kolika emailech se tato priloha vyskytuje
+
+Aktualizace v email dokumentu (kolekce ordinace@buzalkova.cz):
+    attachments[i].file_hash    SHA256 hash
+    attachments[i].local_path   cesta relativni od Attachments/
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze
+"""
+
+import sys
+import hashlib
+import logging
+import argparse
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from pymongo import MongoClient, UpdateOne
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_MAILBOX       = "ordinace@buzalkova.cz"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI           = "mongodb://192.168.1.76:27017"
+MONGO_DB            = "emaily"
+MONGO_COL_EMAILS    = "ordinace@buzalkova.cz"
+MONGO_COL_INDEX     = "attachments_index"
+
+ATTACHMENTS_DIR     = Path("/mnt/Emails/ordinace@buzalkova.cz/Attachments")
+LOG_FILE            = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION      = "1.0"
+BATCH_SIZE          = 50
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+_graph_token: Optional[str] = None
+
+
+# ─── Graph API ────────────────────────────────────────────────────────────────
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get_bytes(url: str) -> bytes:
+    """Stahne binarni obsah prilohy."""
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, timeout=120, stream=True)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.content
+    raise RuntimeError(f"Graph GET bytes failed: {url}")
+
+
+def graph_get_json(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET json failed: {url}")
+
+
+def fetch_attachment_content(graph_message_id: str, attachment_id: str) -> Optional[bytes]:
+    """Stahne obsah prilohy pres Graph API."""
+    url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{graph_message_id}/attachments/{attachment_id}/$value"
+    try:
+        return graph_get_bytes(url)
+    except Exception as e:
+        logging.error("fetch_attachment_content failed [msg=%s att=%s]: %s", graph_message_id, attachment_id, e)
+        return None
+
+
+def fetch_message_attachments(graph_message_id: str) -> list[dict]:
+    """Nacte seznam priloh zpravy z Graph API (metadata vcetne attachment ID)."""
+    url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{graph_message_id}/attachments"
+    try:
+        data = graph_get_json(url, {"$select": "id,name,contentType,size,isInline,contentId"})
+        return data.get("value", [])
+    except Exception as e:
+        logging.error("fetch_message_attachments failed [%s]: %s", graph_message_id, e)
+        return []
+
+
+# ─── Dedup + ukládání ─────────────────────────────────────────────────────────
+
+def sha256(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+def resolve_filename(desired_name: str, att_dir: Path, hash_val: str, index_col) -> str:
+    """
+    Vrati nazev souboru ktery pouzit pro ulozeni.
+    Pokud desired_name jiz existuje s jinym hashem, prida suffix _2, _3 ...
+    """
+    # Zkontroluj jestli existujici soubor se stejnym nazvem ma stejny hash
+    existing = index_col.find_one({"filename": desired_name})
+    if existing:
+        if existing["_id"] == hash_val:
+            return desired_name  # Stejny hash, stejne jmeno — dedup hit
+        # Jiny hash — hledej volny suffix
+        stem   = Path(desired_name).stem
+        suffix = Path(desired_name).suffix
+        n = 2
+        while True:
+            candidate = f"{stem}_{n}{suffix}"
+            if not (att_dir / candidate).exists():
+                # Overi ze ani v indexu neni tento kandidat s jinym hashem
+                ex2 = index_col.find_one({"filename": candidate})
+                if not ex2 or ex2["_id"] == hash_val:
+                    return candidate
+            n += 1
+    return desired_name
+
+
+def save_attachment(content: bytes, original_name: str, att_dir: Path, index_col) -> tuple[str, str, bool]:
+    """
+    Ulozi prilohu s deduplikaci.
+    Vraci (hash, local_path, was_new):
+        was_new=True  -> soubor byl ulozen
+        was_new=False -> hash uz existoval, soubor preskocen
+    """
+    hash_val = sha256(content)
+
+    # Zkontroluj index — pokud hash uz existuje, vrat existujici zaznam
+    existing = index_col.find_one({"_id": hash_val})
+    if existing:
+        # Zvys pocitadlo referenci
+        index_col.update_one({"_id": hash_val}, {"$inc": {"ref_count": 1}})
+        return hash_val, existing["local_path"], False
+
+    # Novy soubor — urcit nazev
+    safe_name = "".join(c if c.isalnum() or c in "._- " else "_" for c in original_name).strip()
+    if not safe_name:
+        safe_name = f"attachment_{hash_val[:8]}"
+
+    filename  = resolve_filename(safe_name, att_dir, hash_val, index_col)
+    file_path = att_dir / filename
+
+    # Uloz soubor
+    file_path.write_bytes(content)
+
+    # Zaznamenej do indexu
+    index_col.insert_one({
+        "_id":          hash_val,
+        "filename":     filename,
+        "local_path":   filename,
+        "size_bytes":   len(content),
+        "mime_type":    "",
+        "first_seen_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        "ref_count":    1,
+    })
+
+    return hash_val, filename, True
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"download_attachments v{SCRIPT_VERSION}")
+    ap.add_argument("--limit",         type=int, default=0,
+                    help="Zpracovat max N emailu (0 = vse)")
+    ap.add_argument("--force-recheck", action="store_true",
+                    help="Znovu overi i emaily kde prilohy uz maji file_hash")
+    ap.add_argument("--no-indexes",    action="store_true",
+                    help="Nevytvorit indexy na konci")
+    args = ap.parse_args()
+
+    start = datetime.now()
+    print(f"=== download_attachments v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {GRAPH_MAILBOX}")
+    print(f"Cilovy adresar: {ATTACHMENTS_DIR}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}")
+
+    # Adresar
+    ATTACHMENTS_DIR.mkdir(parents=True, exist_ok=True)
+    print(f"  Adresar OK")
+
+    # Graph
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    # MongoDB
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+
+    col_emails = client[MONGO_DB][MONGO_COL_EMAILS]
+    col_index  = client[MONGO_DB][MONGO_COL_INDEX]
+
+    # Indexy na attachment index kolekci
+    if not args.no_indexes:
+        col_index.create_index("filename")
+        col_index.create_index("mime_type")
+
+    # Dotaz — emaily s prilohou ktere jeste nebyly zpracovany
+    if args.force_recheck:
+        query = {"has_attachments": True}
+    else:
+        query = {
+            "has_attachments": True,
+            "attachments": {
+                "$elemMatch": {
+                    "is_inline": False,
+                    "file_hash":  {"$exists": False},
+                }
+            }
+        }
+
+    total = col_emails.count_documents(query)
+    print(f"\nEmailu ke zpracovani: {total}")
+    if total == 0:
+        print("Neni co stahnout.")
+        client.close()
+        return
+
+    cursor = col_emails.find(query, {"_id": 1, "graph_id": 1, "subject": 1, "attachments": 1})
+    if args.limit:
+        cursor = cursor.limit(args.limit)
+
+    ok_count   = 0
+    new_count  = 0
+    skip_count = 0
+    err_count  = 0
+    email_i    = 0
+    batch      = []
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col_emails.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    for email_doc in cursor:
+        email_i += 1
+        email_id   = email_doc["_id"]
+        graph_id   = email_doc.get("graph_id", "")
+        subject    = (email_doc.get("subject") or "")[:60]
+        att_list   = email_doc.get("attachments") or []
+
+        # Jen skutecne prilohy
+        real_atts = [a for a in att_list if not a.get("is_inline", False)]
+        if not real_atts:
+            continue
+
+        print(f"\n  {email_i:>5}/{total}  {subject}")
+
+        # Nacti attachment IDs z Graph API
+        graph_atts = fetch_message_attachments(graph_id)
+        graph_att_map = {a["name"]: a for a in graph_atts if not a.get("isInline", False)}
+
+        updated_atts = list(att_list)
+        email_ok = True
+
+        for i, att in enumerate(updated_atts):
+            if att.get("is_inline", False):
+                continue
+            if not args.force_recheck and att.get("file_hash"):
+                skip_count += 1
+                print(f"         SKIP  {att['filename']}")
+                continue
+
+            att_name    = att.get("filename", "")
+            graph_att   = graph_att_map.get(att_name)
+
+            if not graph_att:
+                # Zkus najit podle casti nazvu
+                for gname, ga in graph_att_map.items():
+                    if att_name.lower() in gname.lower():
+                        graph_att = ga
+                        break
+
+            if not graph_att:
+                logging.error("attachment not found in Graph [email=%s att=%s]", email_id, att_name)
+                print(f"         ERR   {att_name} (nenalezeno v Graph)")
+                err_count += 1
+                email_ok = False
+                continue
+
+            # Stahni obsah
+            content = fetch_attachment_content(graph_id, graph_att["id"])
+            if content is None:
+                err_count += 1
+                email_ok = False
+                print(f"         ERR   {att_name} (stazeni selhalo)")
+                continue
+
+            # Uloz s dedupem
+            hash_val, local_path, was_new = save_attachment(content, att_name, ATTACHMENTS_DIR, col_index)
+
+            # Aktualizuj MIME typ v indexu
+            col_index.update_one(
+                {"_id": hash_val},
+                {"$set": {"mime_type": att.get("mime_type", graph_att.get("contentType", ""))}},
+            )
+
+            # Zaznamenej do emailu
+            updated_atts[i] = {**att, "file_hash": hash_val, "local_path": local_path}
+
+            if was_new:
+                new_count += 1
+                print(f"         NEW   {local_path}  ({len(content):,} B)")
+            else:
+                skip_count += 1
+                print(f"         DUP   {att_name} -> {local_path}")
+
+        if email_ok:
+            ok_count += 1
+
+        # Uloz aktualizovane prilohy zpet do emailu
+        batch.append(UpdateOne(
+            {"_id": email_id},
+            {"$set": {"attachments": updated_atts}}
+        ))
+
+        if len(batch) >= BATCH_SIZE:
+            flush()
+
+        if email_i % 100 == 0:
+            elapsed = (datetime.now() - start).total_seconds()
+            print(f"  {'─'*60}")
+            print(f"  Průběh: emaily={email_i}/{total}  nove={new_count}  dup={skip_count}  err={err_count}")
+            print(f"  {'─'*60}")
+
+    flush()
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    files_total   = col_index.count_documents({})
+    size_total    = sum(d.get("size_bytes", 0) for d in col_index.find({}, {"size_bytes": 1}))
+
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  emaily={ok_count}  |  nove soubory={new_count}  |  duplikaty={skip_count}  |  err={err_count}")
+    print(f"Souboru v indexu: {files_total}  ({size_total/1024/1024:.1f} MB)")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,428 @@
+"""
+download_attachments_v1.1.py
+Nazev:  download_attachments_v1.1.py
+Verze:  1.1
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Stahuje skutecne prilohy (is_inline=False) vsech emailu z MongoDB
+    pres Microsoft Graph API a uklada je do adresare
+    /mnt/Emails/<schránka>/Attachments/.
+
+    Schránka se predava jako povinny parametr --mailbox.
+
+    Deduplikace podle SHA256 hashe obsahu:
+        - stejny hash = soubor uz existuje -> preskoci
+        - prvni vyskytu souboru: ulozi pod puvodnimnazvem
+        - kolize nazvu (stejny nazev, jiny hash): faktura_2.pdf, faktura_3.pdf ...
+
+    Po ulozeni aktualizuje MongoDB:
+        - v email dokumentu: kazda priloha dostane file_hash + local_path
+        - kolekce emaily.attachments_index: _id=hash, filename, path, size_bytes,
+          mime_type, mailbox, first_seen_at, ref_count
+
+    Bezpecne prerusit a opakovat — emaily kde vsechny prilohy maji file_hash
+    se preskoci. --force-recheck znovu overi i uz stazene.
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    python download_attachments_v1.1.py --mailbox ordinace@buzalkova.cz
+    python download_attachments_v1.1.py --mailbox vladimir.buzalka@buzalka.cz --limit 50
+    python download_attachments_v1.1.py --mailbox ordinace@buzalkova.cz --force-recheck
+
+Docker:
+    docker exec -it python-runner python /scripts/download_attachments_v1.1.py \\
+        --mailbox ordinace@buzalkova.cz
+
+Zavislosti:
+    msal, requests, pymongo
+    Python 3.10+
+
+Struktura na disku:
+    /mnt/Emails/
+    └── <mailbox>/
+        └── Attachments/
+            ├── faktura_2026.pdf
+            ├── vysledky_lab.pdf
+            ├── vysledky_lab_2.pdf
+            └── ...
+
+Kolekce emaily.attachments_index:
+    _id            SHA256 hash (hex)
+    filename       nazev souboru na disku
+    local_path     relativni cesta od Attachments/
+    size_bytes     velikost souboru
+    mime_type      MIME typ
+    mailbox        schránka ze ktere pochazi prvni vyskytu
+    first_seen_at  datetime UTC
+    ref_count      v kolika emailech se tato priloha vyskytuje
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze
+    1.1  2026-06-02  Schránka jako parametr --mailbox (univerzalni pouziti)
+"""
+
+import sys
+import hashlib
+import logging
+import argparse
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from pymongo import MongoClient, UpdateOne
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI           = "mongodb://192.168.1.76:27017"
+MONGO_DB            = "emaily"
+MONGO_COL_INDEX     = "attachments_index"
+
+EMAILS_BASE_DIR     = Path("/mnt/Emails")
+LOG_FILE            = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION      = "1.1"
+BATCH_SIZE          = 50
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+_graph_token: Optional[str] = None
+
+
+# ─── Graph API ────────────────────────────────────────────────────────────────
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get_bytes(url: str) -> bytes:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, timeout=120, stream=True)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.content
+    raise RuntimeError(f"Graph GET bytes failed: {url}")
+
+
+def graph_get_json(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET json failed: {url}")
+
+
+def fetch_message_attachments(mailbox: str, graph_message_id: str) -> list[dict]:
+    url = f"{GRAPH_URL}/users/{mailbox}/messages/{graph_message_id}/attachments"
+    try:
+        data = graph_get_json(url, {"$select": "id,name,contentType,size,isInline,contentId"})
+        return data.get("value", [])
+    except Exception as e:
+        logging.error("fetch_message_attachments failed [%s]: %s", graph_message_id, e)
+        return []
+
+
+def fetch_attachment_content(mailbox: str, graph_message_id: str, attachment_id: str) -> Optional[bytes]:
+    url = f"{GRAPH_URL}/users/{mailbox}/messages/{graph_message_id}/attachments/{attachment_id}/$value"
+    try:
+        return graph_get_bytes(url)
+    except Exception as e:
+        logging.error("fetch_attachment_content failed [msg=%s att=%s]: %s", graph_message_id, attachment_id, e)
+        return None
+
+
+# ─── Dedup + ukládání ─────────────────────────────────────────────────────────
+
+def sha256(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+def safe_filename(name: str) -> str:
+    safe = "".join(c if c.isalnum() or c in "._- " else "_" for c in name).strip()
+    return safe or "attachment"
+
+
+def resolve_filename(desired_name: str, att_dir: Path, hash_val: str, col_index) -> str:
+    """Vrati nazev souboru pro ulozeni — resi kolize (stejny nazev, jiny hash)."""
+    existing = col_index.find_one({"filename": desired_name})
+    if existing:
+        if existing["_id"] == hash_val:
+            return desired_name  # Dedup hit — stejny hash
+        # Kolize — hledej volny suffix
+        stem   = Path(desired_name).stem
+        suffix = Path(desired_name).suffix
+        n = 2
+        while True:
+            candidate = f"{stem}_{n}{suffix}"
+            ex2 = col_index.find_one({"filename": candidate})
+            if not ex2 or ex2["_id"] == hash_val:
+                if not (att_dir / candidate).exists() or (ex2 and ex2["_id"] == hash_val):
+                    return candidate
+            n += 1
+    return desired_name
+
+
+def save_attachment(
+    content: bytes,
+    original_name: str,
+    mime_type: str,
+    mailbox: str,
+    att_dir: Path,
+    col_index,
+) -> tuple[str, str, bool]:
+    """
+    Ulozi prilohu s deduplikaci.
+    Vraci (hash, local_path, was_new).
+    """
+    hash_val = sha256(content)
+
+    existing = col_index.find_one({"_id": hash_val})
+    if existing:
+        col_index.update_one({"_id": hash_val}, {"$inc": {"ref_count": 1}})
+        return hash_val, existing["local_path"], False
+
+    filename  = resolve_filename(safe_filename(original_name), att_dir, hash_val, col_index)
+    file_path = att_dir / filename
+    file_path.write_bytes(content)
+
+    col_index.insert_one({
+        "_id":          hash_val,
+        "filename":     filename,
+        "local_path":   filename,
+        "size_bytes":   len(content),
+        "mime_type":    mime_type,
+        "mailbox":      mailbox,
+        "first_seen_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        "ref_count":    1,
+    })
+
+    return hash_val, filename, True
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"download_attachments v{SCRIPT_VERSION}")
+    ap.add_argument("--mailbox",       required=True,
+                    help="Emailova schranka (napr. ordinace@buzalkova.cz)")
+    ap.add_argument("--limit",         type=int, default=0,
+                    help="Zpracovat max N emailu (0 = vse)")
+    ap.add_argument("--force-recheck", action="store_true",
+                    help="Znovu overi i emaily kde prilohy uz maji file_hash")
+    ap.add_argument("--no-indexes",    action="store_true",
+                    help="Nevytvorit indexy na attachments_index kolekci")
+    args = ap.parse_args()
+
+    mailbox     = args.mailbox
+    att_dir     = EMAILS_BASE_DIR / mailbox / "Attachments"
+    mongo_col   = mailbox
+
+    start = datetime.now()
+    print(f"=== download_attachments v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {mailbox}")
+    print(f"Cilovy adresar: {att_dir}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}.{mongo_col}")
+
+    att_dir.mkdir(parents=True, exist_ok=True)
+    print("  Adresar OK")
+
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+
+    col_emails = client[MONGO_DB][mongo_col]
+    col_index  = client[MONGO_DB][MONGO_COL_INDEX]
+
+    if not args.no_indexes:
+        col_index.create_index("filename")
+        col_index.create_index("mime_type")
+        col_index.create_index("mailbox")
+
+    # Dotaz
+    if args.force_recheck:
+        query = {"has_attachments": True}
+    else:
+        query = {
+            "has_attachments": True,
+            "attachments": {
+                "$elemMatch": {
+                    "is_inline": False,
+                    "file_hash": {"$exists": False},
+                }
+            }
+        }
+
+    total = col_emails.count_documents(query)
+    print(f"\nEmailu ke zpracovani: {total}")
+    if total == 0:
+        print("Neni co stahnout.")
+        client.close()
+        return
+
+    cursor = col_emails.find(query, {"_id": 1, "graph_id": 1, "subject": 1, "attachments": 1})
+    if args.limit:
+        cursor = cursor.limit(args.limit)
+
+    ok_count   = 0
+    new_count  = 0
+    dup_count  = 0
+    err_count  = 0
+    email_i    = 0
+    batch      = []
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col_emails.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    for email_doc in cursor:
+        email_i   += 1
+        email_id   = email_doc["_id"]
+        graph_id   = email_doc.get("graph_id", "")
+        subject    = (email_doc.get("subject") or "")[:60]
+        att_list   = email_doc.get("attachments") or []
+
+        real_atts = [a for a in att_list if not a.get("is_inline", False)]
+        if not real_atts:
+            continue
+
+        print(f"\n  {email_i:>5}/{total}  {subject}")
+
+        graph_atts    = fetch_message_attachments(mailbox, graph_id)
+        graph_att_map = {a["name"]: a for a in graph_atts if not a.get("isInline", False)}
+
+        updated_atts = list(att_list)
+        email_ok     = True
+
+        for i, att in enumerate(updated_atts):
+            if att.get("is_inline", False):
+                continue
+            if not args.force_recheck and att.get("file_hash"):
+                print(f"         SKIP  {att['filename']}")
+                continue
+
+            att_name  = att.get("filename", "")
+            graph_att = graph_att_map.get(att_name)
+            if not graph_att:
+                for gname, ga in graph_att_map.items():
+                    if att_name.lower() in gname.lower():
+                        graph_att = ga
+                        break
+
+            if not graph_att:
+                logging.error("attachment not found in Graph [email=%s att=%s]", email_id, att_name)
+                print(f"         ERR   {att_name} (nenalezeno v Graph)")
+                err_count += 1
+                email_ok = False
+                continue
+
+            content = fetch_attachment_content(mailbox, graph_id, graph_att["id"])
+            if content is None:
+                err_count += 1
+                email_ok = False
+                print(f"         ERR   {att_name} (stazeni selhalo)")
+                continue
+
+            mime_type = att.get("mime_type") or graph_att.get("contentType", "")
+            hash_val, local_path, was_new = save_attachment(
+                content, att_name, mime_type, mailbox, att_dir, col_index
+            )
+
+            updated_atts[i] = {**att, "file_hash": hash_val, "local_path": local_path}
+
+            if was_new:
+                new_count += 1
+                print(f"         NEW   {local_path}  ({len(content):,} B)")
+            else:
+                dup_count += 1
+                print(f"         DUP   {att_name} -> {local_path}")
+
+        if email_ok:
+            ok_count += 1
+
+        batch.append(UpdateOne({"_id": email_id}, {"$set": {"attachments": updated_atts}}))
+
+        if len(batch) >= BATCH_SIZE:
+            flush()
+
+        if email_i % 100 == 0:
+            elapsed = (datetime.now() - start).total_seconds()
+            print(f"  {'─'*60}")
+            print(f"  Průběh: emaily={email_i}/{total}  nove={new_count}  dup={dup_count}  err={err_count}")
+            print(f"  {'─'*60}")
+
+    flush()
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    files_total   = col_index.count_documents({})
+    size_total    = sum(d.get("size_bytes", 0) for d in col_index.find({}, {"size_bytes": 1}))
+
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  emaily={ok_count}  |  nove={new_count}  |  dup={dup_count}  |  err={err_count}")
+    print(f"Souboru v indexu: {files_total}  ({size_total / 1024 / 1024:.1f} MB)")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,483 @@
+"""
+download_attachments_v1.3.py
+Nazev:  download_attachments_v1.3.py
+Verze:  1.3
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Stahuje skutecne prilohy (is_inline=False) vsech emailu z MongoDB
+    pres Microsoft Graph API a uklada je do adresare
+    /mnt/Emails/<schránka>/Attachments/.
+
+    Schránka se predava jako povinny parametr --mailbox.
+
+    Deduplikace podle SHA256 hashe obsahu:
+        - stejny hash = soubor uz existuje -> preskoci
+        - prvni vyskytu souboru: ulozi pod puvodnimnazvem
+        - kolize nazvu (stejny nazev, jiny hash): faktura_2.pdf, faktura_3.pdf ...
+
+    Po ulozeni aktualizuje MongoDB:
+        - v email dokumentu: kazda priloha dostane file_hash + local_path
+        - kolekce emaily.attachments_index: _id=hash, filename, path, size_bytes,
+          mime_type, mailbox, first_seen_at, ref_count
+
+    Bezpecne prerusit a opakovat — emaily kde vsechny prilohy maji file_hash
+    se preskoci. --force-recheck znovu overi i uz stazene.
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    python download_attachments_v1.3.py --mailbox ordinace@buzalkova.cz
+    python download_attachments_v1.3.py --mailbox ordinace@buzalkova.cz --limit 50
+    python download_attachments_v1.3.py --mailbox ordinace@buzalkova.cz --force-recheck
+
+Docker:
+    docker exec -it python-runner python /scripts/download_attachments_v1.3.py \\
+        --mailbox ordinace@buzalkova.cz
+
+Zavislosti:
+    msal, requests, pymongo
+    Python 3.10+
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze
+    1.1  2026-06-02  Schránka jako parametr --mailbox
+    1.2  2026-06-02  Oprava: Graph attachment mapa vcetne inline; normalizace nazvu;
+                     preskoceni S/MIME; inline z Graphu -> SKIP ne ERR
+    1.3  2026-06-02  Primarni stazeni pres graph_att_id (prime ID bez name-matchingu);
+                     oprava $select na attachment listu (odstranen contentId ktery
+                     zpusoboval BadRequest a vracel prazdny seznam); name-matching
+                     zustava jako fallback pro stare emaily bez graph_att_id
+"""
+
+import sys
+import re
+import hashlib
+import logging
+import argparse
+import unicodedata
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from pymongo import MongoClient, UpdateOne
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI           = "mongodb://192.168.1.76:27017"
+MONGO_DB            = "emaily"
+MONGO_COL_INDEX     = "attachments_index"
+
+EMAILS_BASE_DIR     = Path("/mnt/Emails")
+LOG_FILE            = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION      = "1.3"
+BATCH_SIZE          = 50
+
+# Typy příloh které přeskočíme (S/MIME podpisy, certifikáty)
+SKIP_EXTENSIONS = {".p7m", ".p7s", ".p7c", ".p7b"}
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+_graph_token: Optional[str] = None
+
+
+# ─── Graph API ────────────────────────────────────────────────────────────────
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get_bytes(url: str) -> bytes:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, timeout=120, stream=True)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.content
+    raise RuntimeError(f"Graph GET bytes failed: {url}")
+
+
+def graph_get_json(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET json failed: {url}")
+
+
+def fetch_message_attachments(mailbox: str, graph_message_id: str) -> list[dict]:
+    """Nacte metadata vsech priloh zpravy (bez contentBytes)."""
+    url = f"{GRAPH_URL}/users/{mailbox}/messages/{graph_message_id}/attachments"
+    try:
+        # Pozor: contentId NENI v base attachment type — nesmi byt v $select
+        data = graph_get_json(url, {"$select": "id,name,contentType,size,isInline"})
+        return data.get("value", [])
+    except Exception as e:
+        logging.error("fetch_message_attachments failed [%s]: %s", graph_message_id, e)
+        return []
+
+
+def fetch_attachment_content(mailbox: str, graph_message_id: str, attachment_id: str) -> Optional[bytes]:
+    url = f"{GRAPH_URL}/users/{mailbox}/messages/{graph_message_id}/attachments/{attachment_id}/$value"
+    try:
+        return graph_get_bytes(url)
+    except Exception as e:
+        logging.error("fetch_attachment_content failed [msg=%s att=%s]: %s",
+                      graph_message_id, attachment_id, e)
+        return None
+
+
+# ─── Pomocné funkce ───────────────────────────────────────────────────────────
+
+def normalize_name(name: str) -> str:
+    """Normalizuje název pro porovnání — lowercase, bez diakritiky, jen alnum+._-"""
+    nfkd = unicodedata.normalize("NFKD", name.lower().strip())
+    ascii_str = "".join(c for c in nfkd if not unicodedata.combining(c))
+    return re.sub(r"[^\w.\-]", "_", ascii_str)
+
+
+def find_graph_att(att_name: str, att_size: int, graph_atts: list[dict]) -> Optional[dict]:
+    """Fallback: hleda prilohu v Graph listu podle jmena (pro emaily bez graph_att_id)."""
+    # 1. Presna shoda
+    for ga in graph_atts:
+        if ga["name"] == att_name:
+            return ga
+
+    norm_want = normalize_name(att_name)
+
+    # 2. Normalizovana shoda
+    for ga in graph_atts:
+        if normalize_name(ga["name"]) == norm_want:
+            return ga
+
+    # 3. Normalizovana shoda + velikost (±10 %)
+    for ga in graph_atts:
+        if normalize_name(ga["name"]) == norm_want:
+            ga_size = ga.get("size", 0)
+            if att_size == 0 or ga_size == 0 or abs(ga_size - att_size) / max(ga_size, att_size) < 0.1:
+                return ga
+
+    # 4. Castecna shoda sufixu (posledních 20 znaků normalizovaného jména)
+    for ga in graph_atts:
+        if norm_want[-20:] and normalize_name(ga["name"]).endswith(norm_want[-20:]):
+            return ga
+
+    return None
+
+
+def sha256(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+def safe_filename(name: str) -> str:
+    safe = "".join(c if c.isalnum() or c in "._- ()" else "_" for c in name).strip()
+    return safe or "attachment"
+
+
+def resolve_filename(desired_name: str, att_dir: Path, hash_val: str, col_index) -> str:
+    existing = col_index.find_one({"filename": desired_name})
+    if existing:
+        if existing["_id"] == hash_val:
+            return desired_name
+        stem   = Path(desired_name).stem
+        suffix = Path(desired_name).suffix
+        n = 2
+        while True:
+            candidate = f"{stem}_{n}{suffix}"
+            ex2 = col_index.find_one({"filename": candidate})
+            if not ex2 or ex2["_id"] == hash_val:
+                if not (att_dir / candidate).exists() or (ex2 and ex2["_id"] == hash_val):
+                    return candidate
+            n += 1
+    return desired_name
+
+
+def save_attachment(
+    content: bytes,
+    original_name: str,
+    mime_type: str,
+    mailbox: str,
+    att_dir: Path,
+    col_index,
+) -> tuple[str, str, bool]:
+    hash_val = sha256(content)
+
+    existing = col_index.find_one({"_id": hash_val})
+    if existing:
+        col_index.update_one({"_id": hash_val}, {"$inc": {"ref_count": 1}})
+        return hash_val, existing["local_path"], False
+
+    filename  = resolve_filename(safe_filename(original_name), att_dir, hash_val, col_index)
+    file_path = att_dir / filename
+    file_path.write_bytes(content)
+
+    col_index.insert_one({
+        "_id":           hash_val,
+        "filename":      filename,
+        "local_path":    filename,
+        "size_bytes":    len(content),
+        "mime_type":     mime_type,
+        "mailbox":       mailbox,
+        "first_seen_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        "ref_count":     1,
+    })
+
+    return hash_val, filename, True
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"download_attachments v{SCRIPT_VERSION}")
+    ap.add_argument("--mailbox",       required=True,
+                    help="Emailova schranka (napr. ordinace@buzalkova.cz)")
+    ap.add_argument("--limit",         type=int, default=0,
+                    help="Zpracovat max N emailu (0 = vse)")
+    ap.add_argument("--force-recheck", action="store_true",
+                    help="Znovu overi i emaily kde prilohy uz maji file_hash")
+    ap.add_argument("--no-indexes",    action="store_true",
+                    help="Nevytvorit indexy na attachments_index kolekci")
+    args = ap.parse_args()
+
+    mailbox   = args.mailbox
+    att_dir   = EMAILS_BASE_DIR / mailbox / "Attachments"
+    mongo_col = mailbox
+
+    start = datetime.now()
+    print(f"=== download_attachments v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {mailbox}")
+    print(f"Cilovy adresar: {att_dir}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}.{mongo_col}")
+
+    att_dir.mkdir(parents=True, exist_ok=True)
+    print("  Adresar OK")
+
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+
+    col_emails = client[MONGO_DB][mongo_col]
+    col_index  = client[MONGO_DB][MONGO_COL_INDEX]
+
+    if not args.no_indexes:
+        col_index.create_index("filename")
+        col_index.create_index("mime_type")
+        col_index.create_index("mailbox")
+
+    if args.force_recheck:
+        query = {"has_attachments": True}
+    else:
+        query = {
+            "has_attachments": True,
+            "attachments": {
+                "$elemMatch": {
+                    "is_inline": False,
+                    "file_hash": {"$exists": False},
+                }
+            }
+        }
+
+    total = col_emails.count_documents(query)
+    print(f"\nEmailu ke zpracovani: {total}")
+    if total == 0:
+        print("Neni co stahnout.")
+        client.close()
+        return
+
+    cursor = col_emails.find(query, {"_id": 1, "graph_id": 1, "subject": 1, "attachments": 1})
+    if args.limit:
+        cursor = cursor.limit(args.limit)
+
+    ok_count   = 0
+    new_count  = 0
+    dup_count  = 0
+    skip_count = 0
+    err_count  = 0
+    email_i    = 0
+    batch      = []
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col_emails.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    for email_doc in cursor:
+        email_i  += 1
+        email_id  = email_doc["_id"]
+        graph_id  = email_doc.get("graph_id", "")
+        subject   = (email_doc.get("subject") or "")[:60]
+        att_list  = email_doc.get("attachments") or []
+
+        real_atts = [a for a in att_list if not a.get("is_inline", False)]
+        if not real_atts:
+            continue
+
+        print(f"\n  {email_i:>5}/{total}  {subject}")
+
+        # Nacti attachment list z Graphu jen pokud nektere prilohy nemaji graph_att_id
+        need_listing = any(
+            not a.get("is_inline", False)
+            and not (not args.force_recheck and a.get("file_hash"))
+            and not a.get("graph_att_id")
+            for a in att_list
+        )
+        graph_atts = fetch_message_attachments(mailbox, graph_id) if need_listing else []
+
+        updated_atts = list(att_list)
+        email_ok     = True
+
+        for i, att in enumerate(updated_atts):
+            if att.get("is_inline", False):
+                continue
+            if not args.force_recheck and att.get("file_hash"):
+                continue
+
+            att_name     = att.get("filename", "")
+            att_size     = att.get("size_bytes", 0)
+            graph_att_id = att.get("graph_att_id")
+
+            # Preskoc S/MIME podpisy
+            if Path(att_name).suffix.lower() in SKIP_EXTENSIONS:
+                updated_atts[i] = {**att, "file_hash": "skip", "local_path": ""}
+                skip_count += 1
+                print(f"         SKIP  {att_name} (S/MIME)")
+                continue
+
+            # Primy pristup pres graph_att_id (emaily parsovane v1.2+)
+            if graph_att_id:
+                content = fetch_attachment_content(mailbox, graph_id, graph_att_id)
+                if content is None:
+                    err_count += 1
+                    email_ok = False
+                    print(f"         ERR   {att_name} (stazeni selhalo)")
+                    continue
+                # Zkontroluj zda jde skutecne o inline (pro edge case)
+                mime_type = att.get("mime_type", "")
+            else:
+                # Fallback: name matching pro stare emaily (parsovane pred v1.2)
+                graph_att = find_graph_att(att_name, att_size, graph_atts)
+
+                if not graph_att:
+                    logging.error("attachment not found [email=%s att=%s]", email_id, att_name)
+                    print(f"         ERR   {att_name} (nenalezeno)")
+                    err_count += 1
+                    email_ok = False
+                    continue
+
+                # Pokud Graph rika ze je inline — preskoc
+                if graph_att.get("isInline", False):
+                    updated_atts[i] = {**att, "is_inline": True, "file_hash": "skip", "local_path": ""}
+                    skip_count += 1
+                    print(f"         SKIP  {att_name} (inline obrazek)")
+                    continue
+
+                content = fetch_attachment_content(mailbox, graph_id, graph_att["id"])
+                if content is None:
+                    err_count += 1
+                    email_ok = False
+                    print(f"         ERR   {att_name} (stazeni selhalo)")
+                    continue
+
+                mime_type = att.get("mime_type") or graph_att.get("contentType", "")
+
+            hash_val, local_path, was_new = save_attachment(
+                content, att_name, mime_type, mailbox, att_dir, col_index
+            )
+
+            updated_atts[i] = {**att, "file_hash": hash_val, "local_path": local_path}
+
+            if was_new:
+                new_count += 1
+                print(f"         NEW   {local_path}  ({len(content):,} B)")
+            else:
+                dup_count += 1
+                print(f"         DUP   {att_name} -> {local_path}")
+
+        if email_ok:
+            ok_count += 1
+
+        batch.append(UpdateOne({"_id": email_id}, {"$set": {"attachments": updated_atts}}))
+
+        if len(batch) >= BATCH_SIZE:
+            flush()
+
+        if email_i % 100 == 0:
+            elapsed = (datetime.now() - start).total_seconds()
+            print(f"  {'─'*60}")
+            print(f"  Průběh: emaily={email_i}/{total}  nove={new_count}  dup={dup_count}  skip={skip_count}  err={err_count}")
+            print(f"  {'─'*60}")
+
+    flush()
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    files_total   = col_index.count_documents({})
+    size_total    = sum(d.get("size_bytes", 0) for d in col_index.find({}, {"size_bytes": 1}))
+
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  emaily={ok_count}  |  nove={new_count}  |  dup={dup_count}  |  skip={skip_count}  |  err={err_count}")
+    print(f"Souboru v indexu: {files_total}  ({size_total / 1024 / 1024:.1f} MB)")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,560 @@
+"""
+parse_emails_graph_v1.0.py
+Nazev:  parse_emails_graph_v1.0.py
+Verze:  1.0
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Cte vsechny emaily ze schranky ordinace@buzalkova.cz primo pres
+    Microsoft Graph API a importuje je jako dokumenty do MongoDB.
+    Ze kazde zpravy extrahuje vsechny dostupne vlastnosti:
+
+        - predmet, odesilatel, prijemci (To/CC/BCC s typy)
+        - cas doruceni, odeslani, vytvoreni, modifikace (UTC)
+        - telo HTML (max 2 MB) + textovy preview
+        - prilohy (metadata: jmeno, velikost, MIME typ, inline flag)
+        - internet headers (SPF, DKIM, Received, X-*, ...)
+        - MAPI-ekvivalenty: dulezitost, priznak, konverzacni vlakno,
+          kategorie, In-Reply-To, References, ...
+        - navic: isRead, isDraft, folder_path, inferenceClassification
+
+    Prochazi VSECHNY slozky schranky rekurzivne (Inbox, Sent, Deleted,
+    archivni slozky, ...).
+
+    DB:       emaily
+    Kolekce:  ordinace@buzalkova.cz
+    _id:      Internet Message-ID (nebo "graphid:<id>" jako fallback)
+
+    Bezpecne prerusit a opakovat:
+        - upsert podle _id — duplicity se automaticky prepisi
+        - --skip-existing nacte seznam hotovych _id z MongoDB a preskoci je
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    python parse_emails_graph_v1.0.py                    # kompletni import
+    python parse_emails_graph_v1.0.py --limit 50         # test na prvnich 50
+    python parse_emails_graph_v1.0.py --skip-existing    # pokracovani po preruseni
+    python parse_emails_graph_v1.0.py --folder Inbox     # jen jedna slozka
+    python parse_emails_graph_v1.0.py --no-indexes       # bez indexu na konci
+
+Zavislosti:
+    msal, requests, pymongo, python-dateutil
+    Python 3.10+
+
+Struktura dokumentu v MongoDB:
+    _id                     Internet Message-ID (nebo graphid: fallback)
+    graph_id                Graph API message ID (pro pripadne dalsi operace)
+    subject                 predmet zpravy
+    normalized_subject      predmet bez RE:/FW:/AW: prefixu
+    importance              0=nizka 1=normalni 2=vysoka
+    flag_status             0=bez priznaku 1=oznaceno 2=dokonceno
+    is_read                 bool — aktualni stav precteni ve schrance
+    is_draft                bool
+    has_attachments         bool
+    attachment_count        int
+    inference_classification focused / other (Outlook AI trideni)
+    categories              [str]
+    conversation_id         Graph conversationId
+    conversation_index      base64 conversationIndex
+    conversation_topic      tema vlakna (z internet headers Thread-Topic)
+    in_reply_to             Message-ID predchozi zpravy
+    internet_references     [Message-ID] — cela historia vlakna
+    received_at             datetime UTC
+    sent_at                 datetime UTC
+    created_at              datetime UTC — cas vytvoreni zaznamu v M365
+    modified_at             datetime UTC — cas posledni modifikace
+    folder_id               Graph parentFolderId
+    folder_path             cela cesta slozky (napr. Inbox/Subfolder)
+    sender.email            emailova adresa odesilatele
+    sender.name             zobrazovane jmeno odesilatele
+    to                      retezec To (joined)
+    cc                      retezec CC
+    bcc                     retezec BCC
+    recipients              [{type, email, name}] — to/cc/bcc s typy
+    body_html               HTML telo (max 2 MB)
+    body_preview            textovy nahled (max 255 znaku z Graph)
+    attachments             [{filename, size_bytes, mime_type,
+                              content_id, is_inline}]
+    headers                 dict internet headers (lowercase_s_podtrzitky)
+    parsed_at               datetime UTC — cas parsovani
+
+Indexy:
+    received_at, sent_at, sender.email, graph_id (unique),
+    conversation_id, folder_path, has_attachments, categories,
+    importance, flag_status, is_read,
+    text_search (subject + body_preview + to + cc)
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze — Graph API jako zdroj
+"""
+
+import sys
+import re
+import logging
+import argparse
+import base64
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from dateutil import parser as dtparser
+from pymongo import MongoClient, UpdateOne, ASCENDING, TEXT
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_MAILBOX       = "ordinace@buzalkova.cz"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI      = "mongodb://192.168.1.76:27017"
+MONGO_DB       = "emaily"
+MONGO_COL      = "ordinace@buzalkova.cz"
+BATCH_SIZE     = 100
+PAGE_SIZE      = 50
+LOG_FILE       = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION = "1.0"
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+IMPORTANCE_MAP  = {"low": 0, "normal": 1, "high": 2}
+FLAG_STATUS_MAP = {"notFlagged": 0, "flagged": 1, "complete": 2}
+RE_SUBJECT      = re.compile(r"^(RE|FW|AW|SV|VS|TR|WG|odpov[eě]d[ťt]|fwd?)[:\s]+", re.IGNORECASE)
+
+MSG_SELECT = (
+    "id,internetMessageId,subject,bodyPreview,body,"
+    "importance,isRead,isDraft,hasAttachments,"
+    "receivedDateTime,sentDateTime,createdDateTime,lastModifiedDateTime,"
+    "sender,from,toRecipients,ccRecipients,bccRecipients,replyTo,"
+    "conversationId,conversationIndex,parentFolderId,"
+    "categories,flag,inferenceClassification,internetMessageHeaders"
+)
+
+
+# ─── Graph API helpers ────────────────────────────────────────────────────────
+
+_graph_token: Optional[str] = None
+
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET failed after retry: {url}")
+
+
+def get_all_folders(parent_id: str = None, parent_path: str = "") -> list[dict]:
+    """Rekurzivne nacte vsechny slozky schranky. Vraci [{id, path}]."""
+    if parent_id is None:
+        url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders"
+    else:
+        url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
+
+    folders = []
+    params = {"$top": 100, "$select": "id,displayName,childFolderCount"}
+    while url:
+        data = graph_get(url, params)
+        for f in data.get("value", []):
+            path = f"{parent_path}/{f['displayName']}".lstrip("/")
+            folders.append({"id": f["id"], "path": path})
+            if f.get("childFolderCount", 0) > 0:
+                folders.extend(get_all_folders(f["id"], path))
+        url = data.get("@odata.nextLink")
+        params = None
+    return folders
+
+
+def iter_folder_messages(folder_id: str):
+    """Generator: vraci zpravy ze slozky po strankach."""
+    url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
+    params = {"$top": PAGE_SIZE, "$select": MSG_SELECT, "$expand": "attachments"}
+    while url:
+        data = graph_get(url, params)
+        for msg in data.get("value", []):
+            yield msg
+        url = data.get("@odata.nextLink")
+        params = None
+
+
+# ─── Pomocné funkce ───────────────────────────────────────────────────────────
+
+def parse_date(raw) -> Optional[datetime]:
+    if raw is None:
+        return None
+    if isinstance(raw, datetime):
+        if raw.tzinfo:
+            return raw.astimezone(timezone.utc).replace(tzinfo=None)
+        return raw
+    try:
+        dt = dtparser.parse(str(raw))
+        if dt.tzinfo:
+            return dt.astimezone(timezone.utc).replace(tzinfo=None)
+        return dt
+    except Exception:
+        return None
+
+
+def normalize_subject(subject: str) -> str:
+    s = subject.strip()
+    while True:
+        m = RE_SUBJECT.match(s)
+        if not m:
+            break
+        s = s[m.end():].strip()
+    return s
+
+
+def parse_headers(raw_headers: list) -> dict:
+    result = {}
+    for h in raw_headers:
+        k = h["name"].lower().replace("-", "_")
+        v = h["value"]
+        if k in result:
+            existing = result[k]
+            if isinstance(existing, list):
+                existing.append(v)
+            else:
+                result[k] = [existing, v]
+        else:
+            result[k] = v
+    return result
+
+
+def format_recipients(lst: list) -> str:
+    return "; ".join(
+        f'{r["emailAddress"].get("name", "")} <{r["emailAddress"].get("address", "")}>'.strip()
+        for r in lst
+    )
+
+
+# ─── Hlavní extrakce ─────────────────────────────────────────────────────────
+
+def extract_message(msg: dict, folder_path: str) -> Optional[dict]:
+    try:
+        # _id
+        mid = (msg.get("internetMessageId") or "").strip()
+        if not mid:
+            mid = f"graphid:{msg['id']}"
+
+        subject = msg.get("subject") or ""
+        norm_subject = normalize_subject(subject)
+
+        # tělo
+        body_html = None
+        body_preview = msg.get("bodyPreview") or ""
+        body = msg.get("body", {})
+        if body.get("contentType") == "html":
+            content = body.get("content") or ""
+            body_html = content if len(content) <= 2 * 1024 * 1024 else content[:2 * 1024 * 1024]
+        elif body.get("contentType") == "text":
+            body_preview = (body.get("content") or "")[:2000]
+
+        # odesílatel
+        sender_ea = (msg.get("from") or msg.get("sender") or {}).get("emailAddress", {})
+        sender_email = sender_ea.get("address", "")
+        sender_name  = sender_ea.get("name", "")
+
+        # příjemci
+        to_list  = msg.get("toRecipients", [])
+        cc_list  = msg.get("ccRecipients", [])
+        bcc_list = msg.get("bccRecipients", [])
+
+        recipients = (
+            [{"type": "to",  "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in to_list] +
+            [{"type": "cc",  "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in cc_list] +
+            [{"type": "bcc", "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in bcc_list]
+        )
+
+        # příznaky
+        importance  = IMPORTANCE_MAP.get(msg.get("importance", "normal"), 1)
+        flag_status = FLAG_STATUS_MAP.get((msg.get("flag") or {}).get("flagStatus", "notFlagged"), 0)
+
+        # internet headers
+        raw_headers = msg.get("internetMessageHeaders") or []
+        headers = parse_headers(raw_headers)
+
+        in_reply_to = headers.get("in_reply_to", "")
+        if isinstance(in_reply_to, list):
+            in_reply_to = in_reply_to[0]
+
+        refs_raw = headers.get("references", "")
+        if isinstance(refs_raw, list):
+            refs_raw = " ".join(refs_raw)
+        internet_refs = [r.strip() for r in refs_raw.split() if r.strip()] if refs_raw else []
+
+        conv_topic = headers.get("thread_topic", "")
+        if isinstance(conv_topic, list):
+            conv_topic = conv_topic[0]
+
+        # conversation index
+        conv_index = ""
+        ci_raw = msg.get("conversationIndex")
+        if ci_raw:
+            try:
+                conv_index = base64.b64encode(base64.b64decode(ci_raw)).decode()
+            except Exception:
+                conv_index = ci_raw
+
+        # přílohy (jen metadata, bez obsahu)
+        attachments = []
+        for att in msg.get("attachments") or []:
+            fname = att.get("name") or ""
+            if not fname:
+                continue
+            attachments.append({
+                "filename":   fname,
+                "size_bytes": att.get("size", 0),
+                "mime_type":  att.get("contentType", "application/octet-stream"),
+                "content_id": att.get("contentId"),
+                "is_inline":  att.get("isInline", False),
+            })
+
+        return {
+            "_id":     mid,
+            "graph_id": msg["id"],
+
+            "subject":            subject,
+            "normalized_subject": norm_subject,
+            "importance":         importance,
+            "flag_status":        flag_status,
+            "is_read":            msg.get("isRead", False),
+            "is_draft":           msg.get("isDraft", False),
+            "has_attachments":    msg.get("hasAttachments", False),
+            "attachment_count":   len(attachments),
+            "inference_classification": msg.get("inferenceClassification", ""),
+            "categories":         msg.get("categories") or [],
+
+            "conversation_id":    msg.get("conversationId", ""),
+            "conversation_index": conv_index,
+            "conversation_topic": conv_topic,
+            "in_reply_to":        in_reply_to,
+            "internet_references": internet_refs,
+
+            "received_at": parse_date(msg.get("receivedDateTime")),
+            "sent_at":     parse_date(msg.get("sentDateTime")),
+            "created_at":  parse_date(msg.get("createdDateTime")),
+            "modified_at": parse_date(msg.get("lastModifiedDateTime")),
+
+            "folder_id":   msg.get("parentFolderId", ""),
+            "folder_path": folder_path,
+
+            "sender": {
+                "email": sender_email,
+                "name":  sender_name,
+            },
+            "to":         format_recipients(to_list),
+            "cc":         format_recipients(cc_list),
+            "bcc":        format_recipients(bcc_list),
+            "recipients": recipients,
+
+            "body_html":    body_html,
+            "body_preview": body_preview,
+
+            "attachments": attachments,
+            "headers":     headers,
+
+            "parsed_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        }
+
+    except Exception as e:
+        logging.error("extract_message failed [%s]: %s", msg.get("id", "?"), e)
+        return None
+
+
+# ─── MongoDB indexy ───────────────────────────────────────────────────────────
+
+def create_indexes(col):
+    print("  Vytvarim indexy...")
+    col.create_index([("received_at",    ASCENDING)])
+    col.create_index([("sent_at",        ASCENDING)])
+    col.create_index([("sender.email",   ASCENDING)])
+    col.create_index([("graph_id",       ASCENDING)], unique=True, sparse=True)
+    col.create_index([("conversation_id", ASCENDING)])
+    col.create_index([("folder_path",    ASCENDING)])
+    col.create_index([("has_attachments", ASCENDING)])
+    col.create_index([("categories",     ASCENDING)])
+    col.create_index([("importance",     ASCENDING)])
+    col.create_index([("flag_status",    ASCENDING)])
+    col.create_index([("is_read",        ASCENDING)])
+    col.create_index([
+        ("subject",       TEXT),
+        ("body_preview",  TEXT),
+        ("to",            TEXT),
+        ("cc",            TEXT),
+    ], name="text_search", default_language="none")
+    print("  Indexy hotovy.")
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"parse_emails_graph v{SCRIPT_VERSION}")
+    ap.add_argument("--limit",         type=int, default=0,
+                    help="Zpracovat max N zprav (0 = vse)")
+    ap.add_argument("--skip-existing", action="store_true",
+                    help="Preskocit zpravy ktere jiz jsou v MongoDB")
+    ap.add_argument("--folder",        default="",
+                    help="Zpracovat jen slozku se zadanym nazvem (napr. Inbox)")
+    ap.add_argument("--no-indexes",    action="store_true",
+                    help="Nevytvorit indexy na konci")
+    args = ap.parse_args()
+
+    start = datetime.now()
+    print(f"=== parse_emails_graph v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {GRAPH_MAILBOX}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}.{MONGO_COL}")
+
+    # Graph token
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    # MongoDB
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+    col = client[MONGO_DB][MONGO_COL]
+
+    # Skip existing
+    existing: set = set()
+    if args.skip_existing:
+        print("  Nacitam existujici zaznamy z MongoDB...")
+        existing = set(col.distinct("_id"))
+        print(f"  {len(existing)} jiz importovano")
+
+    # Slozky
+    print("\nNacitam seznam slozek...")
+    all_folders = get_all_folders()
+    if args.folder:
+        all_folders = [f for f in all_folders if args.folder.lower() in f["path"].lower()]
+    print(f"  Slozek ke zpracovani: {len(all_folders)}")
+    for f in all_folders:
+        print(f"    {f['path']}")
+
+    # Import
+    batch     = []
+    ok_count  = 0
+    err_count = 0
+    skip_count = 0
+    total_i   = 0
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    print()
+    for folder in all_folders:
+        print(f"--- Složka: {folder['path']} ---")
+        folder_count = 0
+
+        for msg in iter_folder_messages(folder["id"]):
+            if args.limit and total_i >= args.limit:
+                break
+
+            mid = (msg.get("internetMessageId") or "").strip() or f"graphid:{msg['id']}"
+
+            if mid in existing:
+                skip_count += 1
+                total_i += 1
+                continue
+
+            doc = extract_message(msg, folder["path"])
+            total_i += 1
+            folder_count += 1
+
+            if doc is None:
+                err_count += 1
+            else:
+                batch.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=True))
+                ok_count += 1
+
+            if len(batch) >= BATCH_SIZE:
+                flush()
+
+            status      = "ERR " if doc is None else "OK  "
+            subject_str = (doc.get("subject") or "")[:60] if doc else "?"
+            sender_str  = (doc.get("sender", {}).get("email") or "")[:40] if doc else "?"
+            print(f"  {total_i:>6}  {status}  {subject_str:<60}  {sender_str}")
+
+            if total_i % 500 == 0:
+                elapsed = (datetime.now() - start).total_seconds()
+                rate    = total_i / elapsed if elapsed > 0 else 0
+                print(f"  {'─'*80}")
+                print(f"  Průběh: ok={ok_count}  skip={skip_count}  err={err_count}  {rate:.1f} msg/s")
+                print(f"  {'─'*80}")
+
+        flush()
+        print(f"  → {folder_count} zprav ze slozky {folder['path']}")
+
+        if args.limit and total_i >= args.limit:
+            break
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  ok={ok_count}  |  skip={skip_count}  |  err={err_count}")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"Dokumentu v kolekci: {col.count_documents({})}")
+
+    if not args.no_indexes:
+        print()
+        create_indexes(col)
+
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,611 @@
+"""
+parse_emails_graph_v1.3.py
+Nazev:  parse_emails_graph_v1.3.py
+Verze:  1.3
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Cte vsechny emaily z libovolne schranky primo pres Microsoft Graph API
+    a importuje je jako dokumenty do MongoDB.
+    Ze kazde zpravy extrahuje vsechny dostupne vlastnosti:
+
+        - predmet, odesilatel, prijemci (To/CC/BCC s typy)
+        - cas doruceni, odeslani, vytvoreni, modifikace (UTC)
+        - telo HTML (max 2 MB) + textovy preview
+        - prilohy (metadata: jmeno, velikost, MIME typ, inline flag, graph_att_id)
+        - internet headers (SPF, DKIM, Received, X-*, ...)
+        - MAPI-ekvivalenty: dulezitost, priznak, konverzacni vlakno,
+          kategorie, In-Reply-To, References, ...
+        - navic: isRead, isDraft, folder_path, inferenceClassification
+
+    Prochazi VSECHNY slozky schranky rekurzivne (Inbox, Sent, Deleted,
+    archivni slozky, ...).
+
+    DB:       emaily
+    Kolekce:  <mailbox> (napr. ordinace@buzalkova.cz)
+    _id:      Internet Message-ID (nebo "graphid:<id>" jako fallback)
+
+    POZOR: Skript pouze CIST ze schranky — zadny zapis do schranky!
+
+Spousteni:
+    # Prvni import (vsechno):
+    python parse_emails_graph_v1.3.py --mailbox ordinace@buzalkova.cz
+
+    # Test na prvnich 50:
+    python parse_emails_graph_v1.3.py --mailbox ordinace@buzalkova.cz --limit 50 --no-indexes
+
+    # Jen jedna slozka:
+    python parse_emails_graph_v1.3.py --mailbox ordinace@buzalkova.cz --folder Inbox
+
+    # Pokracovani po preruseni (pouze nove):
+    python parse_emails_graph_v1.3.py --mailbox ordinace@buzalkova.cz --mode new-only
+
+    # Pravidelny sync (aktualizuje is_read, flag, slozku; importuje nove):
+    python parse_emails_graph_v1.3.py --mailbox ordinace@buzalkova.cz --mode sync
+
+    # Jina schranka:
+    python parse_emails_graph_v1.3.py --mailbox vladimir.buzalka@buzalka.cz
+
+Rezimy (--mode):
+    full      Plny upsert vsech poli pro kazdou zpravu (vychozi)
+    new-only  Preskoci zpravy ktere uz jsou v MongoDB, importuje jen nove
+    sync      Existujici: aktualizuje jen is_read/flag_status/categories/
+              modified_at/folder_path. Nove zpravy importuje cely.
+              Idealni pro pravidelne spousteni.
+
+Zavislosti:
+    msal, requests, pymongo, python-dateutil
+    Python 3.10+
+
+Struktura dokumentu v MongoDB:
+    _id                     Internet Message-ID (nebo graphid: fallback)
+    graph_id                Graph API message ID
+    subject                 predmet zpravy
+    normalized_subject      predmet bez RE:/FW:/AW: prefixu
+    importance              0=nizka 1=normalni 2=vysoka
+    flag_status             0=bez priznaku 1=oznaceno 2=dokonceno
+    is_read                 bool — aktualni stav precteni ve schrance
+    is_draft                bool
+    has_attachments         bool
+    attachment_count        int
+    inference_classification focused / other
+    categories              [str]
+    conversation_id         Graph conversationId
+    conversation_index      base64 conversationIndex
+    conversation_topic      tema vlakna (z internet headers Thread-Topic)
+    in_reply_to             Message-ID predchozi zpravy
+    internet_references     [Message-ID]
+    received_at             datetime UTC
+    sent_at                 datetime UTC
+    created_at              datetime UTC
+    modified_at             datetime UTC
+    folder_id               Graph parentFolderId
+    folder_path             cela cesta slozky (napr. Inbox/Subfolder)
+    sender.email            emailova adresa odesilatele
+    sender.name             zobrazovane jmeno
+    to                      retezec To (joined)
+    cc                      retezec CC
+    bcc                     retezec BCC
+    recipients              [{type, email, name}]
+    body_html               HTML telo (max 2 MB)
+    body_preview            textovy nahled (max 255 znaku)
+    attachments             [{filename, size_bytes, mime_type, is_inline, graph_att_id}]
+    headers                 dict internet headers
+    parsed_at               datetime UTC
+
+Indexy:
+    received_at, sent_at, sender.email, graph_id (unique),
+    conversation_id, folder_path, has_attachments, categories,
+    importance, flag_status, is_read,
+    text_search (subject + body_preview + to + cc)
+
+Historie verzi:
+    1.0  2026-06-02  Inicialni verze
+    1.1  2026-06-02  Pridany rezimy --mode full/new-only/sync;
+                     odstranen --skip-existing (nahrazen --mode new-only)
+    1.2  2026-06-02  $expand attachments s $select (bez contentBytes — rychlejsi);
+                     prilohy ukladaji graph_att_id pro prime stazeni bez name-matchingu
+    1.3  2026-06-02  --mailbox jako povinny parametr — univerzalni pouziti pro
+                     libovolnou schranku; kolekce v MongoDB = nazev schranky
+"""
+
+import sys
+import re
+import logging
+import argparse
+import base64
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import msal
+import requests
+from dateutil import parser as dtparser
+from pymongo import MongoClient, UpdateOne, ASCENDING, TEXT
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+GRAPH_TENANT_ID     = "7d269944-37a4-43a1-8140-c7517dc426e9"
+GRAPH_CLIENT_ID     = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
+GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
+GRAPH_URL           = "https://graph.microsoft.com/v1.0"
+
+MONGO_URI      = "mongodb://192.168.1.76:27017"
+MONGO_DB       = "emaily"
+BATCH_SIZE     = 100
+PAGE_SIZE      = 50
+LOG_FILE       = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION = "1.3"
+
+# Schránka se nastavuje za behu z --mailbox parametru
+GRAPH_MAILBOX: str = ""
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+IMPORTANCE_MAP  = {"low": 0, "normal": 1, "high": 2}
+FLAG_STATUS_MAP = {"notFlagged": 0, "flagged": 1, "complete": 2}
+RE_SUBJECT      = re.compile(r"^(RE|FW|AW|SV|VS|TR|WG|odpov[eě]d[ťt]|fwd?)[:\s]+", re.IGNORECASE)
+
+# $expand prilohy bez contentBytes — jen metadata co potrebujeme
+ATT_EXPAND = "attachments($select=id,name,contentType,size,isInline)"
+
+MSG_SELECT = (
+    "id,internetMessageId,subject,bodyPreview,body,"
+    "importance,isRead,isDraft,hasAttachments,"
+    "receivedDateTime,sentDateTime,createdDateTime,lastModifiedDateTime,"
+    "sender,from,toRecipients,ccRecipients,bccRecipients,replyTo,"
+    "conversationId,conversationIndex,parentFolderId,"
+    "categories,flag,inferenceClassification,internetMessageHeaders"
+)
+
+MSG_SELECT_SYNC = (
+    "id,internetMessageId,isRead,isDraft,flag,categories,"
+    "lastModifiedDateTime,parentFolderId,importance"
+)
+
+
+# ─── Graph API helpers ────────────────────────────────────────────────────────
+
+_graph_token: Optional[str] = None
+
+
+def get_token() -> str:
+    global _graph_token
+    app = msal.ConfidentialClientApplication(
+        GRAPH_CLIENT_ID,
+        authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
+        client_credential=GRAPH_CLIENT_SECRET,
+    )
+    result = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
+    if "access_token" not in result:
+        raise RuntimeError(f"Graph auth failed: {result}")
+    _graph_token = result["access_token"]
+    return _graph_token
+
+
+def graph_get(url: str, params: dict = None) -> dict:
+    global _graph_token
+    if not _graph_token:
+        get_token()
+    for attempt in range(2):
+        r = requests.get(url, headers={"Authorization": f"Bearer {_graph_token}"}, params=params, timeout=30)
+        if r.status_code == 401:
+            get_token()
+            continue
+        r.raise_for_status()
+        return r.json()
+    raise RuntimeError(f"Graph GET failed after retry: {url}")
+
+
+def get_all_folders(parent_id: str = None, parent_path: str = "") -> list[dict]:
+    """Rekurzivne nacte vsechny slozky schranky. Vraci [{id, path}]."""
+    if parent_id is None:
+        url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders"
+    else:
+        url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
+
+    folders = []
+    params = {"$top": 100, "$select": "id,displayName,childFolderCount"}
+    while url:
+        data = graph_get(url, params)
+        for f in data.get("value", []):
+            path = f"{parent_path}/{f['displayName']}".lstrip("/")
+            folders.append({"id": f["id"], "path": path})
+            if f.get("childFolderCount", 0) > 0:
+                folders.extend(get_all_folders(f["id"], path))
+        url = data.get("@odata.nextLink")
+        params = None
+    return folders
+
+
+def iter_folder_messages(folder_id: str, select: str = MSG_SELECT, expand_attachments: bool = True):
+    """Generator: vraci zpravy ze slozky po strankach."""
+    url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
+    params = {"$top": PAGE_SIZE, "$select": select}
+    if expand_attachments:
+        params["$expand"] = ATT_EXPAND
+    while url:
+        data = graph_get(url, params)
+        for msg in data.get("value", []):
+            yield msg
+        url = data.get("@odata.nextLink")
+        params = None
+
+
+# ─── Pomocné funkce ───────────────────────────────────────────────────────────
+
+def parse_date(raw) -> Optional[datetime]:
+    if raw is None:
+        return None
+    if isinstance(raw, datetime):
+        if raw.tzinfo:
+            return raw.astimezone(timezone.utc).replace(tzinfo=None)
+        return raw
+    try:
+        dt = dtparser.parse(str(raw))
+        if dt.tzinfo:
+            return dt.astimezone(timezone.utc).replace(tzinfo=None)
+        return dt
+    except Exception:
+        return None
+
+
+def normalize_subject(subject: str) -> str:
+    s = subject.strip()
+    while True:
+        m = RE_SUBJECT.match(s)
+        if not m:
+            break
+        s = s[m.end():].strip()
+    return s
+
+
+def parse_headers(raw_headers: list) -> dict:
+    result = {}
+    for h in raw_headers:
+        k = h["name"].lower().replace("-", "_")
+        v = h["value"]
+        if k in result:
+            existing = result[k]
+            result[k] = existing + [v] if isinstance(existing, list) else [existing, v]
+        else:
+            result[k] = v
+    return result
+
+
+def format_recipients(lst: list) -> str:
+    return "; ".join(
+        f'{r["emailAddress"].get("name", "")} <{r["emailAddress"].get("address", "")}>'.strip()
+        for r in lst
+    )
+
+
+# ─── Extrakce zprávy ─────────────────────────────────────────────────────────
+
+def extract_message(msg: dict, folder_path: str) -> Optional[dict]:
+    """Plna extrakce — pouziva se pro mode full a nove zpravy v sync/new-only."""
+    try:
+        mid = (msg.get("internetMessageId") or "").strip() or f"graphid:{msg['id']}"
+        subject = msg.get("subject") or ""
+
+        body_html = None
+        body_preview = msg.get("bodyPreview") or ""
+        body = msg.get("body", {})
+        if body.get("contentType") == "html":
+            content = body.get("content") or ""
+            body_html = content if len(content) <= 2 * 1024 * 1024 else content[:2 * 1024 * 1024]
+        elif body.get("contentType") == "text":
+            body_preview = (body.get("content") or "")[:2000]
+
+        sender_ea    = (msg.get("from") or msg.get("sender") or {}).get("emailAddress", {})
+        to_list      = msg.get("toRecipients", [])
+        cc_list      = msg.get("ccRecipients", [])
+        bcc_list     = msg.get("bccRecipients", [])
+
+        recipients = (
+            [{"type": "to",  "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in to_list] +
+            [{"type": "cc",  "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in cc_list] +
+            [{"type": "bcc", "email": r["emailAddress"].get("address",""), "name": r["emailAddress"].get("name","")} for r in bcc_list]
+        )
+
+        importance  = IMPORTANCE_MAP.get(msg.get("importance", "normal"), 1)
+        flag_status = FLAG_STATUS_MAP.get((msg.get("flag") or {}).get("flagStatus", "notFlagged"), 0)
+
+        raw_headers   = msg.get("internetMessageHeaders") or []
+        headers       = parse_headers(raw_headers)
+
+        in_reply_to = headers.get("in_reply_to", "")
+        if isinstance(in_reply_to, list):
+            in_reply_to = in_reply_to[0]
+
+        refs_raw = headers.get("references", "")
+        if isinstance(refs_raw, list):
+            refs_raw = " ".join(refs_raw)
+        internet_refs = [r.strip() for r in refs_raw.split() if r.strip()] if refs_raw else []
+
+        conv_topic = headers.get("thread_topic", "")
+        if isinstance(conv_topic, list):
+            conv_topic = conv_topic[0]
+
+        conv_index = ""
+        ci_raw = msg.get("conversationIndex")
+        if ci_raw:
+            try:
+                conv_index = base64.b64encode(base64.b64decode(ci_raw)).decode()
+            except Exception:
+                conv_index = ci_raw
+
+        attachments = []
+        for att in msg.get("attachments") or []:
+            fname = att.get("name") or ""
+            if not fname:
+                continue
+            attachments.append({
+                "filename":     fname,
+                "size_bytes":   att.get("size", 0),
+                "mime_type":    att.get("contentType", "application/octet-stream"),
+                "is_inline":    att.get("isInline", False),
+                "graph_att_id": att.get("id"),
+            })
+
+        return {
+            "_id":      mid,
+            "graph_id": msg["id"],
+
+            "subject":            subject,
+            "normalized_subject": normalize_subject(subject),
+            "importance":         importance,
+            "flag_status":        flag_status,
+            "is_read":            msg.get("isRead", False),
+            "is_draft":           msg.get("isDraft", False),
+            "has_attachments":    msg.get("hasAttachments", False),
+            "attachment_count":   len(attachments),
+            "inference_classification": msg.get("inferenceClassification", ""),
+            "categories":         msg.get("categories") or [],
+
+            "conversation_id":     msg.get("conversationId", ""),
+            "conversation_index":  conv_index,
+            "conversation_topic":  conv_topic,
+            "in_reply_to":         in_reply_to,
+            "internet_references": internet_refs,
+
+            "received_at": parse_date(msg.get("receivedDateTime")),
+            "sent_at":     parse_date(msg.get("sentDateTime")),
+            "created_at":  parse_date(msg.get("createdDateTime")),
+            "modified_at": parse_date(msg.get("lastModifiedDateTime")),
+
+            "folder_id":   msg.get("parentFolderId", ""),
+            "folder_path": folder_path,
+
+            "sender": {
+                "email": sender_ea.get("address", ""),
+                "name":  sender_ea.get("name", ""),
+            },
+            "to":         format_recipients(to_list),
+            "cc":         format_recipients(cc_list),
+            "bcc":        format_recipients(bcc_list),
+            "recipients": recipients,
+
+            "body_html":    body_html,
+            "body_preview": body_preview,
+
+            "attachments": attachments,
+            "headers":     headers,
+
+            "parsed_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        }
+
+    except Exception as e:
+        logging.error("extract_message failed [%s]: %s", msg.get("id", "?"), e)
+        return None
+
+
+def extract_sync_fields(msg: dict, folder_path: str) -> dict:
+    """Jen menitelna pole — pouziva se v sync mode pro existujici zpravy."""
+    return {
+        "is_read":    msg.get("isRead", False),
+        "is_draft":   msg.get("isDraft", False),
+        "flag_status": FLAG_STATUS_MAP.get((msg.get("flag") or {}).get("flagStatus", "notFlagged"), 0),
+        "importance":  IMPORTANCE_MAP.get(msg.get("importance", "normal"), 1),
+        "categories":  msg.get("categories") or [],
+        "modified_at": parse_date(msg.get("lastModifiedDateTime")),
+        "folder_id":   msg.get("parentFolderId", ""),
+        "folder_path": folder_path,
+        "parsed_at":   datetime.now(timezone.utc).replace(tzinfo=None),
+    }
+
+
+# ─── MongoDB indexy ───────────────────────────────────────────────────────────
+
+def create_indexes(col):
+    print("  Vytvarim indexy...")
+    col.create_index([("received_at",     ASCENDING)])
+    col.create_index([("sent_at",         ASCENDING)])
+    col.create_index([("sender.email",    ASCENDING)])
+    col.create_index([("graph_id",        ASCENDING)], unique=True, sparse=True)
+    col.create_index([("conversation_id", ASCENDING)])
+    col.create_index([("folder_path",     ASCENDING)])
+    col.create_index([("has_attachments", ASCENDING)])
+    col.create_index([("categories",      ASCENDING)])
+    col.create_index([("importance",      ASCENDING)])
+    col.create_index([("flag_status",     ASCENDING)])
+    col.create_index([("is_read",         ASCENDING)])
+    col.create_index([
+        ("subject",      TEXT),
+        ("body_preview", TEXT),
+        ("to",           TEXT),
+        ("cc",           TEXT),
+    ], name="text_search", default_language="none")
+    print("  Indexy hotovy.")
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    global GRAPH_MAILBOX
+
+    ap = argparse.ArgumentParser(description=f"parse_emails_graph v{SCRIPT_VERSION}")
+    ap.add_argument("--mailbox",    required=True,
+                    help="Emailova schranka (napr. ordinace@buzalkova.cz)")
+    ap.add_argument("--mode", default="full", choices=["full", "new-only", "sync"],
+                    help="full=plny upsert (vychozi) | new-only=jen nove zpravy | "
+                         "sync=existujici aktualizuje jen menitelna pole, nove importuje cely")
+    ap.add_argument("--limit",      type=int, default=0,
+                    help="Zpracovat max N zprav (0 = vse)")
+    ap.add_argument("--folder",     default="",
+                    help="Zpracovat jen slozku se zadanym nazvem (napr. Inbox)")
+    ap.add_argument("--no-indexes", action="store_true",
+                    help="Nevytvorit indexy na konci")
+    args = ap.parse_args()
+
+    GRAPH_MAILBOX = args.mailbox
+    mongo_col     = args.mailbox
+
+    start = datetime.now()
+    print(f"=== parse_emails_graph v{SCRIPT_VERSION} ===")
+    print(f"Start:    {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Schránka: {GRAPH_MAILBOX}")
+    print(f"MongoDB:  {MONGO_URI} -> {MONGO_DB}.{mongo_col}")
+    print(f"Režim:    {args.mode}")
+
+    print("\nPřipojuji se k Graph API...")
+    try:
+        get_token()
+        print("  Graph API OK")
+    except Exception as e:
+        print(f"  CHYBA: {e}")
+        sys.exit(1)
+
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+    col = client[MONGO_DB][mongo_col]
+
+    existing: set = set()
+    if args.mode in ("new-only", "sync"):
+        print("  Nacitam existujici zaznamy z MongoDB...")
+        existing = set(col.distinct("_id"))
+        print(f"  {len(existing)} jiz importovano")
+
+    print("\nNacitam seznam slozek...")
+    all_folders = get_all_folders()
+    if args.folder:
+        all_folders = [f for f in all_folders if args.folder.lower() in f["path"].lower()]
+    print(f"  Slozek ke zpracovani: {len(all_folders)}")
+    for f in all_folders:
+        print(f"    {f['path']}")
+
+    is_sync    = args.mode == "sync"
+    msg_select = MSG_SELECT_SYNC if is_sync else MSG_SELECT
+    expand_att = not is_sync
+
+    batch      = []
+    ok_count   = 0
+    sync_count = 0
+    err_count  = 0
+    skip_count = 0
+    total_i    = 0
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    print()
+    for folder in all_folders:
+        print(f"--- Složka: {folder['path']} ---")
+        folder_count = 0
+
+        for msg in iter_folder_messages(folder["id"], select=msg_select, expand_attachments=expand_att):
+            if args.limit and total_i >= args.limit:
+                break
+
+            mid = (msg.get("internetMessageId") or "").strip() or f"graphid:{msg['id']}"
+            total_i += 1
+            folder_count += 1
+
+            if args.mode == "new-only" and mid in existing:
+                skip_count += 1
+                continue
+
+            if is_sync and mid in existing:
+                fields = extract_sync_fields(msg, folder["path"])
+                batch.append(UpdateOne({"_id": mid}, {"$set": fields}))
+                sync_count += 1
+                print(f"  {total_i:>6}  SYN   {mid[:80]}")
+            else:
+                if is_sync:
+                    full_url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{msg['id']}"
+                    full_params = {"$select": MSG_SELECT, "$expand": ATT_EXPAND}
+                    try:
+                        msg = graph_get(full_url, full_params)
+                    except Exception as e:
+                        logging.error("full fetch failed [%s]: %s", msg.get("id","?"), e)
+                        err_count += 1
+                        continue
+
+                doc = extract_message(msg, folder["path"])
+                if doc is None:
+                    err_count += 1
+                    print(f"  {total_i:>6}  ERR   {mid[:80]}")
+                else:
+                    batch.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=True))
+                    ok_count += 1
+                    subject_str = (doc.get("subject") or "")[:60]
+                    sender_str  = (doc.get("sender", {}).get("email") or "")[:40]
+                    print(f"  {total_i:>6}  OK    {subject_str:<60}  {sender_str}")
+
+            if len(batch) >= BATCH_SIZE:
+                flush()
+
+            if total_i % 500 == 0:
+                elapsed = (datetime.now() - start).total_seconds()
+                rate    = total_i / elapsed if elapsed > 0 else 0
+                print(f"  {'─'*80}")
+                print(f"  Průběh: ok={ok_count}  sync={sync_count}  skip={skip_count}  err={err_count}  {rate:.1f} msg/s")
+                print(f"  {'─'*80}")
+
+        flush()
+        print(f"  → {folder_count} zprav ze slozky {folder['path']}")
+
+        if args.limit and total_i >= args.limit:
+            break
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  ok={ok_count}  |  sync={sync_count}  |  skip={skip_count}  |  err={err_count}")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"Dokumentu v kolekci: {col.count_documents({})}")
+
+    if not args.no_indexes:
+        print()
+        create_indexes(col)
+
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,248 @@
+# parse_emails_tower_v1.1
+
+## Spuštění
+
+**První spuštění:**
+```bash
+docker exec -d python-runner bash -c \
+  "python /scripts/parse_emails_tower_v1.1.py > /scripts/parse_emails.log 2>&1"
+```
+
+**Pokračování po přerušení (přeskočí už importované):**
+```bash
+docker exec -d python-runner bash -c \
+  "python /scripts/parse_emails_tower_v1.1.py --skip-existing > /scripts/parse_emails.log 2>&1"
+```
+
+---
+
+## Stav importu
+
+**Sledování průběhu (live log):**
+```bash
+docker exec -it python-runner tail -f /scripts/parse_emails.log
+```
+
+**Počet emailů v MongoDB:**
+```bash
+docker exec -it python-runner python -c \
+  "from pymongo import MongoClient; c=MongoClient('mongodb://192.168.1.76:27017'); print(c['emaily']['vbuzalka@its.jnj.com'].count_documents({}))"
+```
+
+---
+
+**Název:** parse_emails_tower_v1.1.py  
+**Verze:** 1.1  
+**Datum:** 2026-06-02  
+**Autor:** vladimir.buzalka  
+
+---
+
+## Účel
+
+Import všech `.msg` souborů do MongoDB. Z každého souboru extrahuje **všechny dostupné vlastnosti** — podobně jako EXIF u fotek.
+
+- **DB:** `emaily`  
+- **Kolekce:** `vbuzalka@its.jnj.com`  
+- `_id` = Internet Message-ID (nebo `filename:<stem>` jako fallback)  
+- Bezpečné přerušit a opakovat — upsert podle `_id`
+
+---
+
+## Prostředí
+
+Běží v Docker containeru **python-runner** na **Unraid Tower**.
+
+| Komponenta | Umístění |
+|---|---|
+| Container | `python-runner` (Docker na Unraid Tower) |
+| .msg soubory | `/mnt/user/JNJEMAILS` → `/mnt/JNJEMAILS` uvnitř containeru |
+| Skripty | `/mnt/user/Scripts` → `/scripts` uvnitř containeru |
+| MongoDB | `192.168.1.76:27017` (externí, mimo container) |
+
+---
+
+## Spouštění (z Unraid terminálu)
+
+**Test na 50 emailech:**
+```bash
+docker exec -it python-runner python /scripts/parse_emails_tower_v1.1.py --limit 50 --no-indexes
+```
+
+**Kompletní import na pozadí (log do souboru):**
+```bash
+docker exec -d python-runner bash -c \
+  "python /scripts/parse_emails_tower_v1.1.py > /scripts/parse_emails.log 2>&1"
+```
+
+**Pokračování po přerušení:**
+```bash
+docker exec -d python-runner bash -c \
+  "python /scripts/parse_emails_tower_v1.1.py --skip-existing > /scripts/parse_emails.log 2>&1"
+```
+
+**Sledování průběhu (Ctrl+C ukončí sledování, import běží dál):**
+```bash
+docker exec -it python-runner tail -f /scripts/parse_emails.log
+```
+
+### Všechny parametry
+
+| Parametr | Popis |
+|---|---|
+| `--skip-existing` | Načte seznam hotových souborů z MongoDB a přeskočí je. Použij pro pokračování po přerušení. |
+| `--limit N` | Zpracuje jen prvních N souborů. Vhodné pro test. |
+| `--no-indexes` | Nevytváří indexy na konci. Použij pokud přerušíš uprostřed — indexy vytvoř ručně až je vše hotové. |
+| `--msgs-dir PATH` | Přepíše výchozí cestu k .msg souborům (výchozí: `/mnt/JNJEMAILS`). |
+
+---
+
+## Průběh na konzoli
+
+Každý email na jednom řádku:
+```
+       1/69371  OK    RE: Protocol deviation CZ10022                    jan.novak@its.jnj.com
+       2/69371  OK    UCO3001: Draft FUL pro DD5-CZ10022                monitor@4gclinical.com
+       3/69371  ERR   ?                                                  ?
+```
+
+Každých 500 emailů oddělovač s průběhem:
+```
+  ────────────────────────────────────────────────────────────────────────────────
+  Průběh: ok=498  err=2  0.4 msg/s  ETA 47h12m
+  ────────────────────────────────────────────────────────────────────────────────
+```
+
+Na konci souhrn:
+```
+====================================================
+Vysledek:  ok=69300  |  skip=0  |  err=71
+Celkovy cas: 47h 23m 10s
+Dokumentu v kolekci: 69300
+```
+
+---
+
+## Zdroje dat z každého .msg
+
+| Pole | Popis |
+|---|---|
+| Předmět, normalized subject | |
+| Odesílatel | email, jméno, SMTP adresa |
+| Příjemci To/CC/BCC | strukturovaně `[{type, email, name}]` |
+| Čas doručení a odeslání | UTC |
+| Tělo | plaintext + HTML (max 2 MB) |
+| Přílohy | metadata: jméno, velikost, MIME typ, inline flag |
+| Internet headers | X-Originating-IP, Received, DKIM, X-Mailer, ... |
+| MAPI | důležitost, citlivost, příznak, konverzační vlákno, kategorie |
+| In-Reply-To, References | pro rekonstrukci vlákna |
+| Raw MAPI properties | `{0xXXXX: value}` |
+
+---
+
+## Hodnotové kódy
+
+| Pole | Hodnota | Význam |
+|---|---|---|
+| `importance` | 0 | Nízká |
+| | 1 | Normální |
+| | 2 | Vysoká |
+| `sensitivity` | 0 | Normální |
+| | 1 | Osobní |
+| | 2 | Soukromé |
+| | 3 | Důvěrné |
+| `flag_status` | 0 | Bez příznaku |
+| | 1 | Označeno (follow up) |
+| | 2 | Dokončeno |
+
+---
+
+## MongoDB indexy
+
+Automaticky vytvořeny na konci importu (`--no-indexes` přeskočí):
+
+| Index | Pole |
+|---|---|
+| Chronologický | `received_at`, `sent_at` |
+| Odesílatel | `sender.email` |
+| Soubor | `filename` (unique) |
+| Konverzace | `conversation_topic` |
+| Filtry | `has_attachments`, `categories`, `importance`, `flag_status` |
+| Full-text | `subject` + `body_text` + `to` + `cc` (text index `text_search`) |
+
+---
+
+## Ukázkové dotazy (MongoDB shell / MCP)
+
+**Emaily o UCO3001 s přílohou:**
+```javascript
+db["vbuzalka@its.jnj.com"].find({
+  $text: { $search: "UCO3001" },
+  has_attachments: true
+}).sort({ received_at: -1 })
+```
+
+**Emaily od konkrétního odesílatele:**
+```javascript
+db["vbuzalka@its.jnj.com"].find({
+  "sender.email": /covance/i
+}).sort({ received_at: -1 })
+```
+
+**Celé konverzační vlákno:**
+```javascript
+db["vbuzalka@its.jnj.com"].find({
+  conversation_topic: "Protocol deviation CZ10022"
+}).sort({ received_at: 1 })
+```
+
+**Statistiky podle odesílatele (top 20):**
+```javascript
+db["vbuzalka@its.jnj.com"].aggregate([
+  { $group: { _id: "$sender.email", count: { $sum: 1 } } },
+  { $sort: { count: -1 } },
+  { $limit: 20 }
+])
+```
+
+---
+
+## Chybový log
+
+Soubory které selhaly jsou zalogrovány do `parse_emails_errors.log` vedle skriptu (tj. `/scripts/parse_emails_errors.log` → `\\tower\Scripts\parse_emails_errors.log`):
+```
+2026-06-02 20:14:33 | open failed [7A3F...0000.msg]: <důvod>
+```
+
+---
+
+## Výkon
+
+| Parametr | Hodnota |
+|---|---|
+| Počet souborů | ~69 000 |
+| Rychlost | ~0.4 msg/s (htmlBody dekódování) |
+| Odhadovaný čas | 48 hodin |
+| Batch size | 200 dokumentů / bulk_write |
+| Odhadovaná velikost DB | 2–5 GB |
+
+---
+
+## Závislosti (v Docker image python-runner)
+
+```
+extract-msg==0.55.0
+pymongo
+python-dateutil
+```
+
+Image sestaven z `Dockerfile` v `/mnt/user/Scripts/python-runner/`.
+
+---
+
+## Historie verzí
+
+| Verze | Datum | Změna |
+|---|---|---|
+| 1.0 | 2026-06-01 | Iniciální verze |
+| 1.1 | 2026-06-02 | Nasazení na Unraid Tower v Docker containeru python-runner; MSGS_DIR změněno z SMB share (`\\tower\JNJEMAILS`) na lokální mount (`/mnt/JNJEMAILS`); aktualizován popis spouštění pro `docker exec` |
@@ -0,0 +1,660 @@
+"""
+parse_emails_tower_v1.1.py
+Nazev:  parse_emails_tower_v1.1.py
+Verze:  1.1
+Datum:  2026-06-02
+Autor:  vladimir.buzalka
+
+Popis:
+    Parsuje vsechny .msg soubory z MSGS_DIR a importuje je jako dokumenty
+    do MongoDB. Z kazdeho souboru extrahuje VSECHNY dostupne vlastnosti —
+    podobne jako EXIF u fotek:
+
+        - predmet, odesilatel, prijemci (To/CC/BCC s typy)
+        - cas doruceni a odeslani (UTC)
+        - telo plaintext + HTML (max 2 MB)
+        - prilohy (metadata: jmeno, velikost, MIME typ, inline flag)
+        - internet headers (X-Originating-IP, Received, DKIM, ...)
+        - MAPI vlastnosti: dulezitost, citlivost, priznak, konverzacni vlakno,
+          kategorie, In-Reply-To, References, ...
+        - vsechny raw MAPI properties jako {0xXXXX: value}
+
+    DB:       emaily
+    Kolekce:  vbuzalka@its.jnj.com
+    _id:      Internet Message-ID (nebo "filename:<stem>" jako fallback)
+
+    Bezpecne prerusit a opakovat:
+        - upsert podle _id — duplicity se automaticky prepisi
+        - --skip-existing nacte seznam hotovych souboru z MongoDB a
+          preskoci je => pokracovani po preruseni bez ztraty prace
+
+Prostredi:
+    Bezi v Docker containeru "python-runner" na Unraid Tower.
+    .msg soubory jsou dostupne jako lokalni disk (volume mount):
+        /mnt/user/JNJEMAILS  ->  /mnt/JNJEMAILS  (uvnitr containeru)
+    MongoDB na 192.168.1.76:27017 (externi, bezi mimo container).
+
+Spousteni (z Unraid terminalu):
+    # Test na 50 emailech:
+    docker exec -it python-runner python /scripts/parse_emails_tower_v1.1.py --limit 50 --no-indexes
+
+    # Kompletni import na pozadi (log do souboru):
+    docker exec -d python-runner bash -c \
+      "python /scripts/parse_emails_tower_v1.1.py > /scripts/parse_emails.log 2>&1"
+
+    # Pokracovani po preruseni:
+    docker exec -d python-runner bash -c \
+      "python /scripts/parse_emails_tower_v1.1.py --skip-existing > /scripts/parse_emails.log 2>&1"
+
+    # Sledovani prubehu:
+    docker exec -it python-runner tail -f /scripts/parse_emails.log
+
+Vystup na konzoli:
+    Kazdy email na jednom radku:
+        <poradi>/<celkem>  OK/ERR  <predmet 60 znaku>  <odesilatel>
+    Kazych 500 emailu: oddelovac s prubehem, rychlosti a ETA.
+    Na konci: souhrn ok/skip/err, celkovy cas, pocet dokumentu v kolekci.
+
+Zavislosti (nainstalovane v Docker image python-runner):
+    extract-msg==0.55.0, pymongo, python-dateutil
+    Python 3.12, Linux (Docker container na Unraid Tower)
+
+Struktura dokumentu v MongoDB:
+    _id                     Internet Message-ID (nebo filename: fallback)
+    filename                jmeno .msg souboru (20znakovy hex + .msg)
+    subject                 predmet zpravy
+    normalized_subject      predmet bez RE:/FW: prefixu
+    importance              0=nizka 1=normalni 2=vysoka
+    sensitivity             0=normalni 1=osobni 2=soukrome 3=duverne
+    flag_status             0=bez priznaku 1=oznaceno 2=dokonceno
+    read_receipt_requested  bool
+    delivery_receipt_requested bool
+    has_attachments         bool
+    attachment_count        int
+    message_size_bytes      velikost .msg souboru na disku
+    conversation_topic      tema vlakna (PR_CONVERSATION_TOPIC)
+    conversation_index      base64 PR_CONVERSATION_INDEX
+    in_reply_to             Message-ID predchozi zpravy
+    internet_references     [Message-ID] — cela historia vlakna
+    categories              [str] — MAPI kategorie / stitky
+    read_receipt_requested  bool
+    delivery_receipt_requested bool
+    received_at             datetime UTC — cas doruceni
+    sent_at                 datetime UTC — cas odeslani
+    sender.email            emailova adresa odesilatele
+    sender.name             zobrazovane jmeno odesilatele
+    sender.smtp             SMTP adresa (pro interni EX adresy)
+    to                      retezec To (tak jak v Outlooku)
+    cc                      retezec CC
+    bcc                     retezec BCC
+    display_to              PR_DISPLAY_TO (zkraceny seznam)
+    display_cc              PR_DISPLAY_CC
+    recipients              [{type, email, name}] — to/cc/bcc s typy
+    body_text               plain text telo
+    body_html               HTML telo (max 2 MB, None pokud neni)
+    attachments             [{filename, size_bytes, mime_type,
+                              content_id, is_inline}]
+    headers                 dict internet headers (lowercase_s_podtrzitky)
+    mapi                    dict vsech raw MAPI properties {0xXXXX: value}
+    parsed_at               datetime UTC — cas parsovani
+
+Indexy (vytvoreny automaticky na konci):
+    received_at, sent_at, sender.email, filename (unique),
+    conversation_topic, has_attachments, categories, importance,
+    flag_status, text_search (subject + body_text + to + cc)
+
+Chyby:
+    Soubory ktere selhaly jsou zalogiovany do parse_emails_errors.log
+    v adresari skriptu. Radek: timestamp | open/extract failed | duvod.
+
+Historie verzi:
+    1.0  2026-06-01  Inicialni verze
+    1.1  2026-06-02  Nasazeni na Unraid Tower v Docker containeru python-runner;
+                     MSGS_DIR zmeneno z SMB share na lokalni mount /mnt/JNJEMAILS;
+                     aktualizovany popis spousteni pro docker exec
+"""
+
+import sys
+import re
+import logging
+import argparse
+import base64
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import Optional
+
+import extract_msg
+from dateutil import parser as dtparser
+from pymongo import MongoClient, UpdateOne, ASCENDING, TEXT
+
+if hasattr(sys.stdout, "reconfigure"):
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+
+# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
+MSGS_DIR       = Path("/mnt/JNJEMAILS")
+MONGO_URI      = "mongodb://192.168.1.76:27017"
+MONGO_DB       = "emaily"
+MONGO_COL      = "vbuzalka@its.jnj.com"
+BATCH_SIZE     = 200
+LOG_FILE       = Path(__file__).parent / "parse_emails_errors.log"
+SCRIPT_VERSION = "1.1"
+# ──────────────────────────────────────────────────────────────────────────────
+
+logging.basicConfig(
+    filename=str(LOG_FILE),
+    level=logging.ERROR,
+    format="%(asctime)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    encoding="utf-8",
+)
+
+
+# ─── Pomocné funkce ───────────────────────────────────────────────────────────
+
+def safe(obj, *attrs, default=None):
+    """Bezpecne cteni atributu — vrati prvni non-None hodnotu."""
+    for attr in attrs:
+        try:
+            val = getattr(obj, attr, None)
+            if val is None:
+                continue
+            if isinstance(val, str) and not val.strip():
+                continue
+            return val
+        except Exception:
+            continue
+    return default
+
+
+def parse_date(raw) -> Optional[datetime]:
+    """Libovolny datum -> UTC datetime bez tzinfo (pro MongoDB)."""
+    if raw is None:
+        return None
+    if isinstance(raw, datetime):
+        if raw.tzinfo:
+            return raw.astimezone(timezone.utc).replace(tzinfo=None)
+        return raw
+    try:
+        dt = dtparser.parse(str(raw))
+        if dt.tzinfo:
+            return dt.astimezone(timezone.utc).replace(tzinfo=None)
+        return dt
+    except Exception:
+        return None
+
+
+def to_bson(val):
+    """Konvertuje hodnotu na BSON-serializovatelny typ."""
+    if isinstance(val, bytes):
+        return val.hex() if len(val) <= 128 else f"<bytes:{len(val)}>"
+    if isinstance(val, datetime):
+        return parse_date(val)
+    if isinstance(val, (str, int, float, bool, type(None))):
+        return val
+    if isinstance(val, list):
+        return [to_bson(v) for v in val]
+    try:
+        return int(val)
+    except Exception:
+        pass
+    return str(val)
+
+
+# ─── Extrakce částí zprávy ────────────────────────────────────────────────────
+
+def extract_headers(msg) -> dict:
+    headers = {}
+    try:
+        hdr = msg.header
+        if not hdr:
+            return {}
+        from email.header import decode_header as _dh
+
+        def _decode(v: str) -> str:
+            try:
+                parts = _dh(v)
+                out = ""
+                for part, enc in parts:
+                    out += part.decode(enc or "utf-8", errors="replace") if isinstance(part, bytes) else part
+                return out
+            except Exception:
+                return v
+
+        for key in set(hdr.keys()):
+            k = key.lower().replace("-", "_")
+            vals = [_decode(v) for v in hdr.get_all(key, [])]
+            headers[k] = vals if len(vals) > 1 else (vals[0] if vals else "")
+    except Exception as e:
+        logging.error("extract_headers: %s", e)
+    return headers
+
+
+def extract_recipients(msg) -> list:
+    result = []
+    type_map = {1: "to", 2: "cc", 3: "bcc"}
+    try:
+        for r in msg.recipients:
+            rtype = getattr(r, "type", 1)
+            try:
+                rtype = int(rtype)
+            except Exception:
+                try:
+                    rtype = int(rtype.value)
+                except Exception:
+                    rtype = 1
+            rec = {
+                "type":  type_map.get(rtype, "to"),
+                "email": safe(r, "email", default=""),
+                "name":  safe(r, "name",  default=""),
+            }
+            result.append(rec)
+    except Exception as e:
+        logging.error("extract_recipients: %s", e)
+    return result
+
+
+def extract_attachments(msg) -> list:
+    result = []
+    try:
+        for att in msg.attachments:
+            fname = safe(att, "longFilename", "shortFilename", default="")
+            if not fname:
+                continue
+            size = 0
+            try:
+                d = att.data
+                size = len(d) if d else 0
+            except Exception:
+                pass
+            result.append({
+                "filename":   fname,
+                "size_bytes": size,
+                "mime_type":  safe(att, "mimetype", "mimeType", default="application/octet-stream"),
+                "content_id": safe(att, "cid", default=None),
+                "is_inline":  bool(safe(att, "isInline", default=False)),
+            })
+    except Exception as e:
+        logging.error("extract_attachments: %s", e)
+    return result
+
+
+def extract_mapi_props(msg) -> dict:
+    """Vsechny raw MAPI properties jako {0xXXXX: value}."""
+    result = {}
+    try:
+        props = msg.props
+        if not hasattr(props, "items"):
+            return {}
+        for key, prop in props.items():
+            try:
+                val = to_bson(prop.value)
+                prop_id = f"0x{key[:4].upper()}" if len(key) >= 4 else f"0x{key.upper()}"
+                result[prop_id] = val
+            except Exception:
+                pass
+    except Exception as e:
+        logging.error("extract_mapi_props: %s", e)
+    return result
+
+
+# ─── Hlavní extrakce ─────────────────────────────────────────────────────────
+
+def extract_message(msg_path: Path) -> Optional[dict]:
+    """Parsuje jeden .msg soubor -> MongoDB dokument."""
+    try:
+        msg = extract_msg.Message(str(msg_path))
+    except Exception as e:
+        logging.error("open failed [%s]: %s", msg_path.name, e)
+        return None
+
+    try:
+        # ── Message-ID ────────────────────────────────────────────────
+        mid = None
+        for attr in ("messageId", "message_id", "internetMessageId"):
+            mid = safe(msg, attr)
+            if mid:
+                break
+        if not mid:
+            mid = f"filename:{msg_path.stem}"
+        mid = str(mid).strip()
+
+        # ── Předmět ───────────────────────────────────────────────────
+        try:
+            subject = msg.subject or ""
+        except Exception:
+            subject = ""
+
+        normalized_subject = safe(msg, "normalizedSubject", "normalized_subject", default="")
+
+        # ── Tělo ──────────────────────────────────────────────────────
+        try:
+            body_text = msg.body or ""
+        except Exception:
+            body_text = ""
+
+        body_html = None
+        try:
+            bh = msg.htmlBody
+            if isinstance(bh, bytes):
+                bh = bh.decode("utf-8", errors="replace")
+            if bh:
+                body_html = bh if len(bh) <= 2 * 1024 * 1024 else bh[:2 * 1024 * 1024]
+        except Exception:
+            pass
+
+        # ── Odesílatel ────────────────────────────────────────────────
+        try:
+            sender_email = msg.sender or ""
+        except Exception:
+            sender_email = ""
+
+        sender_name = safe(msg, "senderName", "sender_name", default="")
+        sender_smtp = safe(msg, "senderSmtpAddress", "sent_representing_smtp_address", default="")
+
+        # ── Příjemci ──────────────────────────────────────────────────
+        recipients = extract_recipients(msg)
+
+        try:
+            to_raw = msg.to or ""
+        except Exception:
+            to_raw = ""
+        try:
+            cc_raw = msg.cc or ""
+        except Exception:
+            cc_raw = ""
+        try:
+            bcc_raw = getattr(msg, "bcc", None) or ""
+        except Exception:
+            bcc_raw = ""
+
+        display_to = safe(msg, "displayTo",  "display_to",  default="")
+        display_cc = safe(msg, "displayCc",  "display_cc",  default="")
+
+        # ── Časy ──────────────────────────────────────────────────────
+        try:
+            received_at = parse_date(msg.date)
+        except Exception:
+            received_at = None
+
+        sent_at = None
+        for attr in ("clientSubmitTime", "client_submit_time", "sentOn"):
+            v = safe(msg, attr)
+            if v:
+                sent_at = parse_date(v)
+                break
+
+        # ── MAPI vlastnosti ───────────────────────────────────────────
+        importance = 1
+        try:
+            v = msg.importance
+            if v is not None:
+                importance = int(v)
+        except Exception:
+            pass
+
+        sensitivity = 0
+        try:
+            v = getattr(msg, "sensitivity", None)
+            if v is not None:
+                sensitivity = int(v)
+        except Exception:
+            pass
+
+        flag_status = 0
+        try:
+            v = safe(msg, "flagStatus", "flag_status")
+            if v is not None:
+                flag_status = int(v)
+        except Exception:
+            pass
+
+        conversation_topic = safe(msg, "conversationTopic", "conversation_topic", default="")
+
+        conversation_index = ""
+        try:
+            ci = safe(msg, "conversationIndex", "conversation_index")
+            if isinstance(ci, bytes):
+                conversation_index = base64.b64encode(ci).decode()
+            elif ci:
+                conversation_index = str(ci)
+        except Exception:
+            pass
+
+        in_reply_to = safe(msg, "inReplyTo", "in_reply_to", default="")
+
+        internet_refs = []
+        try:
+            refs = safe(msg, "internetReferences", "internet_references")
+            if isinstance(refs, list):
+                internet_refs = refs
+            elif isinstance(refs, str) and refs:
+                internet_refs = [r.strip() for r in refs.split() if r.strip()]
+        except Exception:
+            pass
+
+        categories = []
+        try:
+            cats = safe(msg, "categories")
+            if isinstance(cats, list):
+                categories = [str(c) for c in cats if c]
+            elif isinstance(cats, str) and cats:
+                categories = [c.strip() for c in re.split(r"[;,]", cats) if c.strip()]
+        except Exception:
+            pass
+
+        read_receipt     = bool(safe(msg, "readReceiptRequested",    "read_receipt_requested",    default=False))
+        delivery_receipt = bool(safe(msg, "deliveryReceiptRequested", "delivery_receipt_requested", default=False))
+
+        # ── Internet headers ──────────────────────────────────────────
+        headers = extract_headers(msg)
+
+        if not in_reply_to:
+            in_reply_to = headers.get("in_reply_to", "")
+        if not internet_refs:
+            refs_str = headers.get("references", "")
+            if isinstance(refs_str, str) and refs_str:
+                internet_refs = [r.strip() for r in refs_str.split() if r.strip()]
+
+        # ── Přílohy ───────────────────────────────────────────────────
+        attachments = extract_attachments(msg)
+
+        # ── Raw MAPI ──────────────────────────────────────────────────
+        mapi_raw = extract_mapi_props(msg)
+
+        msg.close()
+
+        # ── Dokument ──────────────────────────────────────────────────
+        return {
+            "_id":      mid,
+            "filename": msg_path.name,
+
+            "subject":            subject,
+            "normalized_subject": normalized_subject,
+            "importance":         importance,
+            "sensitivity":        sensitivity,
+            "flag_status":        flag_status,
+            "read_receipt_requested":     read_receipt,
+            "delivery_receipt_requested": delivery_receipt,
+            "has_attachments":    len(attachments) > 0,
+            "attachment_count":   len(attachments),
+            "message_size_bytes": msg_path.stat().st_size,
+
+            "conversation_topic":  conversation_topic,
+            "conversation_index":  conversation_index,
+            "in_reply_to":         in_reply_to,
+            "internet_references": internet_refs,
+            "categories":          categories,
+
+            "received_at": received_at,
+            "sent_at":     sent_at,
+
+            "sender": {
+                "email": sender_email,
+                "name":  sender_name,
+                "smtp":  sender_smtp,
+            },
+            "to":         to_raw,
+            "cc":         cc_raw,
+            "bcc":        bcc_raw,
+            "display_to": display_to,
+            "display_cc": display_cc,
+            "recipients": recipients,
+
+            "body_text": body_text,
+            "body_html": body_html,
+
+            "attachments": attachments,
+            "headers":     headers,
+            "mapi":        mapi_raw,
+
+            "parsed_at": datetime.now(timezone.utc).replace(tzinfo=None),
+        }
+
+    except Exception as e:
+        logging.error("extract_message failed [%s]: %s", msg_path.name, e)
+        return None
+
+
+# ─── MongoDB indexy ───────────────────────────────────────────────────────────
+
+def create_indexes(col):
+    print("  Vytvarim indexy...")
+    col.create_index([("received_at",        ASCENDING)])
+    col.create_index([("sent_at",            ASCENDING)])
+    col.create_index([("sender.email",       ASCENDING)])
+    col.create_index([("filename",           ASCENDING)], unique=True, sparse=True)
+    col.create_index([("conversation_topic", ASCENDING)])
+    col.create_index([("has_attachments",    ASCENDING)])
+    col.create_index([("categories",         ASCENDING)])
+    col.create_index([("importance",         ASCENDING)])
+    col.create_index([("flag_status",        ASCENDING)])
+    col.create_index([
+        ("subject",   TEXT),
+        ("body_text", TEXT),
+        ("to",        TEXT),
+        ("cc",        TEXT),
+    ], name="text_search", default_language="none")
+    print("  Indexy hotovy.")
+
+
+# ─── MAIN ─────────────────────────────────────────────────────────────────────
+
+def main():
+    ap = argparse.ArgumentParser(description=f"parse_emails v{SCRIPT_VERSION}")
+    ap.add_argument("--msgs-dir",      default=str(MSGS_DIR),
+                    help="Cesta k .msg souborum")
+    ap.add_argument("--limit",         type=int, default=0,
+                    help="Zpracovat max N souboru (0 = vse)")
+    ap.add_argument("--skip-existing", action="store_true",
+                    help="Preskocit soubory ktere jiz jsou v MongoDB (pokracovani)")
+    ap.add_argument("--no-indexes",    action="store_true",
+                    help="Nevytvorit indexy na konci")
+    args = ap.parse_args()
+
+    msgs_dir = Path(args.msgs_dir)
+    start    = datetime.now()
+
+    print(f"=== parse_emails v{SCRIPT_VERSION} ===")
+    print(f"Start:   {start.strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Zdroj:   {msgs_dir}")
+    print(f"MongoDB: {MONGO_URI} -> {MONGO_DB}.{MONGO_COL}")
+
+    # MongoDB
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
+    try:
+        client.admin.command("ping")
+        print("  MongoDB OK")
+    except Exception as e:
+        print(f"  CHYBA: MongoDB neni dostupna -- {e}")
+        sys.exit(1)
+
+    col = client[MONGO_DB][MONGO_COL]
+
+    # Skip existing — nacti seznam uz importovanych souboru
+    existing: set = set()
+    if args.skip_existing:
+        print("  Nacitam existujici zaznamy z MongoDB...")
+        existing = set(col.distinct("filename"))
+        print(f"  {len(existing)} jiz importovano")
+
+    # Scan
+    print(f"\nSkenuji {msgs_dir} ...")
+    all_files = sorted(msgs_dir.glob("*.msg"))
+    if args.limit:
+        all_files = all_files[:args.limit]
+
+    to_process = [f for f in all_files if f.name not in existing]
+    skipped    = len(all_files) - len(to_process)
+    total      = len(to_process)
+
+    print(f"  Celkem .msg:    {len(all_files)}")
+    print(f"  Preskoceno:     {skipped}")
+    print(f"  Ke zpracovani:  {total}\n")
+
+    if total == 0:
+        print("Neni co importovat.")
+        client.close()
+        return
+
+    batch     = []
+    ok_count  = 0
+    err_count = 0
+
+    def flush():
+        if not batch:
+            return
+        try:
+            col.bulk_write(batch, ordered=False)
+        except Exception as e:
+            logging.error("bulk_write: %s", e)
+            print(f"  CHYBA bulk_write: {e}")
+        batch.clear()
+
+    for i, msg_path in enumerate(to_process, 1):
+        doc = extract_message(msg_path)
+
+        if doc is None:
+            err_count += 1
+        else:
+            batch.append(UpdateOne({"_id": doc["_id"]}, {"$set": doc}, upsert=True))
+            ok_count += 1
+
+        if len(batch) >= BATCH_SIZE:
+            flush()
+
+        # Výpis každého emailu
+        status = "ERR " if doc is None else "OK  "
+        subject_str = (doc.get("subject") or "")[:60] if doc else "?"
+        sender_str  = (doc.get("sender", {}).get("email") or "")[:40] if doc else "?"
+        print(f"  {i:>6}/{total}  {status}  {subject_str:<60}  {sender_str}")
+
+        if i % 500 == 0:
+            elapsed = (datetime.now() - start).total_seconds()
+            rate    = i / elapsed if elapsed > 0 else 0
+            eta_s   = int((total - i) / rate) if rate > 0 else 0
+            print(f"  {'─'*80}")
+            print(f"  Průběh: ok={ok_count}  err={err_count}  "
+                  f"{rate:.1f} msg/s  ETA {eta_s//3600}h{(eta_s%3600)//60}m")
+            print(f"  {'─'*80}")
+
+    flush()
+
+    elapsed_total = (datetime.now() - start).total_seconds()
+    print(f"\n{'='*52}")
+    print(f"Vysledek:  ok={ok_count}  |  skip={skipped}  |  err={err_count}")
+    print(f"Celkovy cas: {int(elapsed_total//3600)}h {int((elapsed_total%3600)//60)}m {int(elapsed_total%60)}s")
+    print(f"Dokumentu v kolekci: {col.count_documents({})}")
+
+    if not args.no_indexes:
+        print()
+        create_indexes(col)
+
+    print(f"\nKonec: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    if err_count:
+        print(f"Chyby logovany do: {LOG_FILE}")
+
+    client.close()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,122 @@
+# python-runner — Docker kontejner na Tower
+
+## Základní info
+
+| Parametr       | Hodnota                                      |
+|----------------|----------------------------------------------|
+| Název          | python-runner                                |
+| Image          | python-runner (vlastní)                      |
+| Status         | running (unless-stopped)                     |
+| Python         | 3.12.13                                      |
+| Spouštěcí cmd  | `tail -f /dev/null` — container jen běží, skripty se spouštějí ručně |
+| Working dir    | `/scripts`                                   |
+| Vytvořen       | 2026-06-02                                   |
+
+---
+
+## Tower — SSH přístup
+
+| Parametr | Hodnota          |
+|----------|------------------|
+| Host     | tower / 192.168.1.76 |
+| Port     | 22               |
+| User     | root             |
+| Heslo    | 7309208104       |
+
+**Připojení přes Python (paramiko)** — Docker CLI není lokálně dostupný:
+
+```python
+import paramiko
+c = paramiko.SSHClient()
+c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+c.connect('192.168.1.76', username='root', password='7309208104')
+_, out, _ = c.exec_command('...')
+print(out.read().decode())
+c.close()
+```
+
+---
+
+## Volume mounty
+
+| Host (Unraid)         | Kontejner         | Popis                        |
+|-----------------------|-------------------|------------------------------|
+| `/mnt/user/Scripts`   | `/scripts`        | Skripty, logy — working dir  |
+| `/mnt/user/JNJEMAILS` | `/mnt/JNJEMAILS`  | .msg soubory emailů (JNJ)    |
+
+---
+
+## Spouštění skriptů
+
+```bash
+# Interaktivně (vidíš výstup):
+docker exec -it python-runner python /scripts/parse_emails_tower_v1.1.py --limit 50 --no-indexes
+
+# Na pozadí (log do souboru):
+docker exec -d python-runner bash -c \
+  "python /scripts/parse_emails_tower_v1.1.py > /scripts/parse_emails.log 2>&1"
+
+# Pokračování po přerušení (skip hotových):
+docker exec -d python-runner bash -c \
+  "python /scripts/parse_emails_tower_v1.1.py --skip-existing > /scripts/parse_emails.log 2>&1"
+
+# Sledování průběhu:
+docker exec -it python-runner tail -f /scripts/parse_emails.log
+```
+
+---
+
+## Aktuální skripty v /scripts
+
+| Soubor                        | Popis                                          |
+|-------------------------------|------------------------------------------------|
+| `parse_emails_tower_v1.1.py`  | Import .msg → MongoDB (db: emaily, kolekce: vbuzalka@its.jnj.com) |
+| `parse_emails_tower_v1.1.md`  | Dokumentace ke skriptu                         |
+| `parse_emails.log`            | Log průběhu importu                            |
+| `parse_emails_errors.log`     | Log chyb (soubory které selhaly)               |
+
+Lokální protějšek: `EmailsImport/parse_emails_v1.0.py` — identický kód, liší se jen cestou
+(`\\tower\JNJEMAILS` SMB vs. `/mnt/JNJEMAILS` lokální mount) a verzí hlavičky.
+
+---
+
+## Nainstalované Python balíčky
+
+```
+extract-msg        0.55.0
+pymongo            4.17.0
+python-dateutil    2.9.0.post0
+cryptography       48.0.0
+beautifulsoup4     4.13.5
+oletools           0.60.2
+msoffcrypto-tool   6.0.0
+olefile            0.47
+RTFDE              0.1.2.2
+compressed-rtf     1.0.7
+lark               1.3.1
+pcodedmp           1.2.6
+tzlocal            5.3.1
+six                1.17.0
+pip                25.0.1
+```
+
+---
+
+## Přidání nového balíčku
+
+```bash
+docker exec python-runner pip install <balicek>
+```
+
+> Pozor: instalace se ztratí při recreate kontejneru — je třeba přidat do Dockerfile nebo do setup skriptu.
+
+---
+
+## Logika parse_emails (oba skripty)
+
+- Čte všechny `.msg` soubory z MSGS_DIR
+- Extrahuje: předmět, odesílatel, příjemci (To/CC/BCC), tělo (text+HTML), přílohy, internet headers, všechny raw MAPI properties
+- Ukládá do MongoDB: `emaily` → `vbuzalka@its.jnj.com`
+- `_id` = Internet Message-ID (nebo `filename:<stem>` jako fallback)
+- Upsert → bezpečné opakování, `--skip-existing` pro pokračování
+- Indexy: received_at, sent_at, sender.email, filename (unique), full-text (subject+body+to+cc)