Files
2026-06-10 20:16:38 +02:00

586 lines
21 KiB
Python

# app.py | v2.0 | 2026-06-08
# FastAPI server pro příjem .msg a .db souborů, upload do Dropboxu a import do Graph API.
# Endpointy: /upload (.msg → /msgs + Graph import), /upload-db (.db → /msgs/db),
# /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230),
# /message-delete, /message-update (sync: smazání, přečtení, přesun složky),
# /mirror-plan (diff manifestu z JNJ vůči schránce → smaže přebytky, vrátí to_add),
# /status (seznam souborů k odeslání na JNJ — jména zašifrována Fernetem),
# /item/{enc_filename} (stažení souboru — enc_filename je Fernet token).
from fastapi import FastAPI, UploadFile, File, Form, Header, HTTPException, Response
from pydantic import BaseModel
import shutil
import base64
import hashlib
import logging
from pathlib import Path
from typing import Optional
import os
import dropbox
import msal
import requests as http_requests
import extract_msg
from dateutil import parser as dtparser
from datetime import timezone
from dotenv import load_dotenv
from cryptography.fernet import Fernet
load_dotenv(Path(__file__).parent / ".env")
app = FastAPI()
log = logging.getLogger("msgreceiver")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
# Šifrovací klíč odvozený z TOKENu (Fernet = AES-128 CBC + HMAC)
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
SAVE_DIR = Path("/msgs")
DB_DIR = Path("/msgs/db")
SAVE_DIR.mkdir(parents=True, exist_ok=True)
DB_DIR.mkdir(parents=True, exist_ok=True)
DROPBOX_APP_KEY = os.getenv("DROPBOX_APP_KEY", "")
DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET", "")
DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_APP_REFRESH_TOKEN", "")
# --- Graph API config ---
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
GRAPH_ROOT_FOLDER = "JNJ" # subfolder under Inbox — root for imported emails
DROPBOX_UPLOAD_TO_JNJ = "/!!!Days/Downloads Z230/UploadToJNJ"
GRAPH_URL = "https://graph.microsoft.com/v1.0"
# Cache: folder path → Graph folder ID
_folder_id_cache: dict[str, str] = {}
_graph_token: Optional[str] = None
def _get_graph_token() -> str:
global _graph_token
msalapp = msal.ConfidentialClientApplication(
GRAPH_CLIENT_ID,
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
client_credential=GRAPH_CLIENT_SECRET,
)
result = msalapp.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" not in result:
raise RuntimeError(f"Graph auth failed: {result}")
_graph_token = result["access_token"]
return _graph_token
def _graph_headers() -> dict:
token = _graph_token or _get_graph_token()
return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
def _ensure_folder(path_parts: list[str]) -> str:
"""Ensure folder hierarchy exists under Inbox, return leaf folder ID."""
cache_key = "/".join(path_parts)
if cache_key in _folder_id_cache:
return _folder_id_cache[cache_key]
headers = _graph_headers()
parent_id = "Inbox"
for i, part in enumerate(path_parts):
partial_key = "/".join(path_parts[: i + 1])
if partial_key in _folder_id_cache:
parent_id = _folder_id_cache[partial_key]
continue
# List children of parent
if parent_id == "Inbox":
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders"
else:
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
r = http_requests.get(url, headers=headers, timeout=15)
if r.status_code == 401:
_get_graph_token()
headers = _graph_headers()
r = http_requests.get(url, headers=headers, timeout=15)
found = None
for f in r.json().get("value", []):
if f["displayName"].lower() == part.lower():
found = f["id"]
break
if not found:
# Create folder
cr = http_requests.post(url, headers=headers, json={"displayName": part}, timeout=15)
if cr.status_code in (200, 201):
found = cr.json()["id"]
elif cr.status_code == 409:
# Already exists (race condition) — re-fetch
r2 = http_requests.get(url, headers=headers, timeout=15)
for f in r2.json().get("value", []):
if f["displayName"].lower() == part.lower():
found = f["id"]
break
if not found:
raise RuntimeError(f"Cannot create folder '{part}': {cr.text}")
_folder_id_cache[partial_key] = found
parent_id = found
return parent_id
def _map_jnj_folder(folder: str) -> list[str]:
"""Map JNJ folder path to Graph folder parts under JNJ root.
'/vbuzalka@its.jnj.com/Inbox/TMP' → ['JNJ', 'Inbox', 'TMP']
'/Online Archive - vbuzalka@its.jnj.com/Inbox' → ['JNJ', 'Online Archive', 'Inbox']
"""
parts = [p for p in folder.split("/") if p]
if not parts:
return [GRAPH_ROOT_FOLDER]
# First part is mailbox name — strip it but detect Online Archive
mailbox = parts[0]
rest = parts[1:]
prefix = [GRAPH_ROOT_FOLDER]
if "online archive" in mailbox.lower():
prefix.append("Online Archive")
return prefix + rest if rest else prefix
def _norm_mid(mid: str) -> str:
"""Normalizuj Internet Message-ID pro porovnání (osekej <> a whitespace)."""
return (mid or "").strip().strip("<>").strip()
def _enumerate_jnj_mailbox(cutoff_iso: str) -> dict[str, str]:
"""Vrať {normalizované internetMessageId: graph_id} pro všechny zprávy ve
složkách JNJ/* schránky, které mají receivedDateTime >= cutoff_iso.
Slouží jako 'co už ve schránce je' pro mirror diff. Starší zprávy než cutoff
(např. únorový archiv) se nenačtou — mirror se jich tedy nikdy nedotkne.
"""
jnj_id = _ensure_folder([GRAPH_ROOT_FOLDER])
# BFS přes JNJ root + všechny podsložky
all_folders = [jnj_id]
i = 0
while i < len(all_folders):
fid = all_folders[i]
i += 1
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/childFolders?$top=100"
while url:
r = _retry_graph(http_requests.get, url, _graph_headers, timeout=20)
data = r.json()
for f in data.get("value", []):
all_folders.append(f["id"])
url = data.get("@odata.nextLink")
# Posbírej message-id z každé složky (filtrováno na okno)
result: dict[str, str] = {}
cutoff_enc = cutoff_iso.replace(":", "%3A")
for fid in all_folders:
url = (
f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/messages"
f"?$filter=receivedDateTime ge {cutoff_enc}"
f"&$select=id,internetMessageId&$top=200"
)
while url:
r = _retry_graph(http_requests.get, url, _graph_headers, timeout=30)
data = r.json()
for m in data.get("value", []):
mid = _norm_mid(m.get("internetMessageId", ""))
if mid:
result[mid] = m["id"]
url = data.get("@odata.nextLink")
return result
def _make_recipient(addr: str) -> dict:
if "<" in addr and ">" in addr:
name = addr[: addr.index("<")].strip().strip('"')
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
else:
name = addr
email = addr
return {"emailAddress": {"name": name, "address": email}}
def _import_msg_to_graph(msg_path: Path, folder: str) -> Optional[str]:
"""Parse .msg and import into Graph API mailbox. Returns message ID or None."""
try:
msg = extract_msg.Message(str(msg_path))
subject = msg.subject or "(no subject)"
# Čtení těla — extract_msg může selhat na nestandartním kódování (cp1252 apod.)
try:
body_html = msg.htmlBody
if isinstance(body_html, bytes):
body_html = body_html.decode("utf-8", errors="replace")
except Exception:
body_html = None
try:
body_text = msg.body or ""
except Exception:
body_text = ""
try:
sender_email = msg.sender or ""
except Exception:
sender_email = ""
try:
sender_name = getattr(msg, "senderName", None) or sender_email
except Exception:
sender_name = sender_email
try:
to_raw = msg.to or ""
except Exception:
to_raw = ""
try:
cc_raw = msg.cc or ""
except Exception:
cc_raw = ""
try:
date_raw = msg.date
except Exception:
date_raw = None
att_list = []
for att in msg.attachments:
if att.data and att.longFilename:
att_list.append({
"@odata.type": "#microsoft.graph.fileAttachment",
"name": att.longFilename,
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
"contentBytes": base64.b64encode(att.data).decode(),
})
msg.close()
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
# Map folder and ensure it exists
folder_parts = _map_jnj_folder(folder)
folder_id = _ensure_folder(folder_parts)
ext_props = [{"id": "Integer 0x0E07", "value": "1"}]
if date_raw:
try:
dt = dtparser.parse(str(date_raw))
dt_str = dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
# PR_MESSAGE_DELIVERY_TIME (0x0E06) — jediný způsob jak nastavit
# receivedDateTime přes Graph API (přímé pole je read-only)
ext_props.append({"id": "SystemTime 0x0E06", "value": dt_str})
except Exception:
dt_str = None
else:
dt_str = None
payload = {
"subject": subject,
"body": {
"contentType": "HTML" if body_html else "Text",
"content": body_html or body_text,
},
"from": _make_recipient(f"{sender_name} <{sender_email}>"),
"toRecipients": [_make_recipient(a) for a in to_list],
"ccRecipients": [_make_recipient(a) for a in cc_list],
"isRead": True,
"singleValueExtendedProperties": ext_props,
}
if dt_str:
payload["sentDateTime"] = dt_str
if att_list:
payload["attachments"] = att_list
headers = _graph_headers()
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code == 401:
_get_graph_token()
headers = _graph_headers()
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code in (200, 201):
msg_id = r.json().get("id", "")
log.info("Graph OK: %s%s", subject[:60], "/".join(folder_parts))
return msg_id
else:
log.error("Graph FAIL [%d]: %s | %s", r.status_code, subject[:60], r.text[:200])
return None
except Exception as e:
log.error("Graph import error for %s: %s", msg_path.name, e)
return None
@app.post("/upload")
async def upload_msg(
file: UploadFile = File(...),
authorization: str = Header(None),
folder: str = Form(""),
):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
is_encrypted = file.filename.endswith(".emsg")
if not file.filename.endswith(".msg") and not is_encrypted:
raise HTTPException(status_code=400, detail="Only .msg or .emsg files accepted")
# Ukládáme vždy jako .msg
msg_filename = file.filename[:-5] + ".msg" if is_encrypted else file.filename
dest = SAVE_DIR / msg_filename
if dest.exists():
return {"status": "exists", "file": msg_filename}
content = await file.read()
if is_encrypted:
content = _FERNET.decrypt(content)
with dest.open("wb") as f:
f.write(content)
# Import to Graph API if folder was provided by client
graph_id = None
if folder:
graph_id = _import_msg_to_graph(dest, folder)
return {
"status": "saved",
"file": msg_filename,
"graph_id": graph_id,
}
@app.post("/upload-db")
async def upload_db(
file: UploadFile = File(...),
authorization: str = Header(None)
):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
if not file.filename.endswith(".db"):
raise HTTPException(status_code=400, detail="Only .db files accepted")
for old in DB_DIR.glob("*.db"):
old.unlink()
dest = DB_DIR / file.filename
with dest.open("wb") as f:
shutil.copyfileobj(file.file, f)
return {"status": "saved", "file": file.filename}
class MessageDeleteRequest(BaseModel):
graph_id: str
class MessageUpdateRequest(BaseModel):
graph_id: str
is_read: Optional[bool] = None
folder: Optional[str] = None
def _retry_graph(method, url, headers_fn, **kwargs):
"""Call Graph API, refresh token once on 401."""
headers = headers_fn()
r = method(url, headers=headers, **kwargs)
if r.status_code == 401:
_get_graph_token()
headers = headers_fn()
r = method(url, headers=headers, **kwargs)
return r
@app.post("/message-delete")
async def message_delete(req: MessageDeleteRequest, authorization: str = Header(None)):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{req.graph_id}"
r = _retry_graph(http_requests.delete, url, _graph_headers, timeout=15)
if r.status_code in (200, 204):
log.info("Graph DELETE OK: %s", req.graph_id)
return {"status": "deleted"}
raise HTTPException(status_code=500, detail=f"Graph DELETE failed: {r.status_code} {r.text[:200]}")
@app.post("/message-update")
async def message_update(req: MessageUpdateRequest, authorization: str = Header(None)):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
current_graph_id = req.graph_id
result: dict = {"status": "ok"}
# Move first — returns new graph_id which we use for subsequent read-status update
if req.folder:
folder_parts = _map_jnj_folder(req.folder)
folder_id = _ensure_folder(folder_parts)
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{current_graph_id}/move"
r = _retry_graph(http_requests.post, url, _graph_headers,
json={"destinationId": folder_id}, timeout=15)
if r.status_code in (200, 201):
current_graph_id = r.json().get("id", current_graph_id)
result["moved"] = True
log.info("Graph MOVE OK: %s%s", req.graph_id, "/".join(folder_parts))
else:
log.error("Graph MOVE FAIL [%d]: %s", r.status_code, r.text[:200])
result["moved"] = False
if req.is_read is not None:
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{current_graph_id}"
r = _retry_graph(http_requests.patch, url, _graph_headers,
json={"isRead": req.is_read}, timeout=15)
result["read_updated"] = r.status_code in (200, 201)
if not result["read_updated"]:
log.error("Graph PATCH isRead FAIL [%d]: %s", r.status_code, r.text[:200])
result["graph_id"] = current_graph_id
return result
class MirrorPlanRequest(BaseModel):
manifest: list[dict] # [{"message_id": ..., "folder": ..., "is_read": ...}]
cutoff: str # ISO8601 UTC, např. "2026-05-09T00:00:00Z"
@app.post("/mirror-plan")
async def mirror_plan(req: MirrorPlanRequest, authorization: str = Header(None)):
"""Porovná manifest zpráv z JNJ (posledních 30 dní) se stavem schránky.
- smaže ze schránky zprávy které v manifestu nejsou (smazané v JNJ / vypadlé z okna)
- vrátí to_add = message_id které ve schránce chybí (klient je pak nahraje na /upload)
Maže POUZE v rámci okna (cutoff) — starší archiv zůstává nedotčen.
"""
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
# manifest: normalizované id → původní message_id (pro echo zpět klientovi)
manifest_map: dict[str, str] = {}
for e in req.manifest:
mid = _norm_mid(e.get("message_id", ""))
if mid:
manifest_map[mid] = e["message_id"]
mailbox = _enumerate_jnj_mailbox(req.cutoff) # {norm_mid: graph_id}
to_add = [orig for nmid, orig in manifest_map.items() if nmid not in mailbox]
to_delete = [(nmid, gid) for nmid, gid in mailbox.items() if nmid not in manifest_map]
deleted = 0
for nmid, gid in to_delete:
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{gid}"
r = _retry_graph(http_requests.delete, url, _graph_headers, timeout=15)
if r.status_code in (200, 204):
deleted += 1
else:
log.error("mirror delete FAIL [%d]: %s", r.status_code, r.text[:150])
log.info(
"mirror-plan: manifest=%d mailbox=%d → add=%d delete=%d",
len(manifest_map), len(mailbox), len(to_add), deleted,
)
return {
"to_add": to_add,
"deleted": deleted,
"manifest_count": len(manifest_map),
"mailbox_count": len(mailbox),
}
@app.post("/upload-file")
async def upload_file(
file: UploadFile = File(...),
authorization: str = Header(None),
):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
if not DROPBOX_REFRESH_TOKEN:
raise HTTPException(status_code=500, detail="Dropbox not configured")
is_encrypted = file.filename.endswith(".enc")
orig_filename = file.filename[:-4] if is_encrypted else file.filename
raw = await file.read()
file_content = _FERNET.decrypt(raw) if is_encrypted else raw
dbx = dropbox.Dropbox(
app_key=DROPBOX_APP_KEY,
app_secret=DROPBOX_APP_SECRET,
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
)
dropbox_path = f"/!!!Days/Downloads Z230/{orig_filename}"
dbx.files_upload(file_content, dropbox_path, mode=dropbox.files.WriteMode.overwrite)
return {"status": "uploaded", "file": orig_filename, "dropbox_path": dropbox_path}
@app.get("/status")
async def pending_files(authorization: str = Header(None)):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
dbx = dropbox.Dropbox(
app_key=DROPBOX_APP_KEY,
app_secret=DROPBOX_APP_SECRET,
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
)
try:
result = dbx.files_list_folder(DROPBOX_UPLOAD_TO_JNJ)
files = [e.name for e in result.entries if isinstance(e, dropbox.files.FileMetadata)]
except Exception:
files = []
log.info("pending-files: %d souboru", len(files))
# Jména souborů zašifrujeme — klient vidí v URL jen neprůhledný token (bypass Zscaler)
encrypted_names = [_FERNET.encrypt(name.encode()).decode() for name in files]
return {"files": encrypted_names}
@app.get("/item/{filename:path}")
async def download_file(filename: str, authorization: str = Header(None)):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
# filename je Fernet token (zašifrované původní jméno souboru)
try:
orig_filename = _FERNET.decrypt(filename.encode()).decode()
except Exception:
raise HTTPException(status_code=400, detail="Invalid filename token")
dbx = dropbox.Dropbox(
app_key=DROPBOX_APP_KEY,
app_secret=DROPBOX_APP_SECRET,
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
)
dropbox_path = f"{DROPBOX_UPLOAD_TO_JNJ}/{orig_filename}"
try:
_, response = dbx.files_download(dropbox_path)
raw = response.content
except Exception as e:
log.error("download-file: nelze stáhnout %s: %s", filename, e)
raise HTTPException(status_code=404, detail=f"Soubor nenalezen: {filename}")
encrypted = _FERNET.encrypt(raw)
# Přesun do Sent
sent_path = f"{DROPBOX_UPLOAD_TO_JNJ}/##Trash/{orig_filename}"
try:
dbx.files_move_v2(dropbox_path, sent_path, autorename=True)
log.info("download-file: %s přesunut do Sent", orig_filename)
except Exception as e:
log.warning("download-file: nelze přesunout %s do Sent: %s", orig_filename, e)
return Response(
content=encrypted,
media_type="application/octet-stream",
headers={"Content-Disposition": f'attachment; filename="{orig_filename}.enc"'},
)