Files
janssen/EmailsImport/DockerCustomApp/app.py
T
administrator 66475d48d2 file_receive: přejmenování endpointů /pending-files,/download-file → /status,/item
Pokus obejít blokaci JNJ web-proxy, která zařezává GET na "mluvící" názvy
(403 Forbidden + přepis URL na ?_sm_nck=1). POST /upload prochází, GET ne.
Neutrální názvy /status a /item, metoda zůstává GET — izoluje vliv názvu URL.

- klient janssenpc_file_receive.py: PENDING_URL/DOWNLOAD_URL na /status,/item
- server DockerCustomApp/app.py: srovnáno s živou verzí z kontejneru
  (odstraněn drift) + routy přejmenovány, nasazeno na Unraid msgreceiver

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 05:34:51 +02:00

472 lines
16 KiB
Python

# app.py | v1.7 | 2026-06-05
# FastAPI server pro příjem .msg a .db souborů, upload do Dropboxu a import do Graph API.
# Endpointy: /upload (.msg → /msgs + Graph import), /upload-db (.db → /msgs/db),
# /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230),
# /message-delete, /message-update (sync: smazání, přečtení, přesun složky),
# /pending-files (seznam souborů k odeslání na JNJ), /download-file/{filename}.
from fastapi import FastAPI, UploadFile, File, Form, Header, HTTPException, Response
from pydantic import BaseModel
import shutil
import base64
import hashlib
import logging
from pathlib import Path
from typing import Optional
import os
import dropbox
import msal
import requests as http_requests
import extract_msg
from dateutil import parser as dtparser
from datetime import timezone
from dotenv import load_dotenv
from cryptography.fernet import Fernet
load_dotenv(Path(__file__).parent / ".env")
app = FastAPI()
log = logging.getLogger("msgreceiver")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
# Šifrovací klíč odvozený z TOKENu (Fernet = AES-128 CBC + HMAC)
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
SAVE_DIR = Path("/msgs")
DB_DIR = Path("/msgs/db")
SAVE_DIR.mkdir(parents=True, exist_ok=True)
DB_DIR.mkdir(parents=True, exist_ok=True)
DROPBOX_APP_KEY = os.getenv("DROPBOX_APP_KEY", "")
DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET", "")
DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_APP_REFRESH_TOKEN", "")
# --- Graph API config ---
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
GRAPH_ROOT_FOLDER = "JNJ" # subfolder under Inbox — root for imported emails
DROPBOX_UPLOAD_TO_JNJ = "/!!!Days/Downloads Z230/UploadToJNJ"
GRAPH_URL = "https://graph.microsoft.com/v1.0"
# Cache: folder path → Graph folder ID
_folder_id_cache: dict[str, str] = {}
_graph_token: Optional[str] = None
def _get_graph_token() -> str:
global _graph_token
msalapp = msal.ConfidentialClientApplication(
GRAPH_CLIENT_ID,
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
client_credential=GRAPH_CLIENT_SECRET,
)
result = msalapp.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" not in result:
raise RuntimeError(f"Graph auth failed: {result}")
_graph_token = result["access_token"]
return _graph_token
def _graph_headers() -> dict:
token = _graph_token or _get_graph_token()
return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
def _ensure_folder(path_parts: list[str]) -> str:
"""Ensure folder hierarchy exists under Inbox, return leaf folder ID."""
cache_key = "/".join(path_parts)
if cache_key in _folder_id_cache:
return _folder_id_cache[cache_key]
headers = _graph_headers()
parent_id = "Inbox"
for i, part in enumerate(path_parts):
partial_key = "/".join(path_parts[: i + 1])
if partial_key in _folder_id_cache:
parent_id = _folder_id_cache[partial_key]
continue
# List children of parent
if parent_id == "Inbox":
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders"
else:
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
r = http_requests.get(url, headers=headers, timeout=15)
if r.status_code == 401:
_get_graph_token()
headers = _graph_headers()
r = http_requests.get(url, headers=headers, timeout=15)
found = None
for f in r.json().get("value", []):
if f["displayName"].lower() == part.lower():
found = f["id"]
break
if not found:
# Create folder
cr = http_requests.post(url, headers=headers, json={"displayName": part}, timeout=15)
if cr.status_code in (200, 201):
found = cr.json()["id"]
elif cr.status_code == 409:
# Already exists (race condition) — re-fetch
r2 = http_requests.get(url, headers=headers, timeout=15)
for f in r2.json().get("value", []):
if f["displayName"].lower() == part.lower():
found = f["id"]
break
if not found:
raise RuntimeError(f"Cannot create folder '{part}': {cr.text}")
_folder_id_cache[partial_key] = found
parent_id = found
return parent_id
def _map_jnj_folder(folder: str) -> list[str]:
"""Map JNJ folder path to Graph folder parts under JNJ root.
'/vbuzalka@its.jnj.com/Inbox/TMP' → ['JNJ', 'Inbox', 'TMP']
'/Online Archive - vbuzalka@its.jnj.com/Inbox' → ['JNJ', 'Online Archive', 'Inbox']
"""
parts = [p for p in folder.split("/") if p]
if not parts:
return [GRAPH_ROOT_FOLDER]
# First part is mailbox name — strip it but detect Online Archive
mailbox = parts[0]
rest = parts[1:]
prefix = [GRAPH_ROOT_FOLDER]
if "online archive" in mailbox.lower():
prefix.append("Online Archive")
return prefix + rest if rest else prefix
def _make_recipient(addr: str) -> dict:
if "<" in addr and ">" in addr:
name = addr[: addr.index("<")].strip().strip('"')
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
else:
name = addr
email = addr
return {"emailAddress": {"name": name, "address": email}}
def _import_msg_to_graph(msg_path: Path, folder: str) -> Optional[str]:
"""Parse .msg and import into Graph API mailbox. Returns message ID or None."""
try:
msg = extract_msg.Message(str(msg_path))
subject = msg.subject or "(no subject)"
# Čtení těla — extract_msg může selhat na nestandartním kódování (cp1252 apod.)
try:
body_html = msg.htmlBody
if isinstance(body_html, bytes):
body_html = body_html.decode("utf-8", errors="replace")
except Exception:
body_html = None
try:
body_text = msg.body or ""
except Exception:
body_text = ""
try:
sender_email = msg.sender or ""
except Exception:
sender_email = ""
try:
sender_name = getattr(msg, "senderName", None) or sender_email
except Exception:
sender_name = sender_email
try:
to_raw = msg.to or ""
except Exception:
to_raw = ""
try:
cc_raw = msg.cc or ""
except Exception:
cc_raw = ""
try:
date_raw = msg.date
except Exception:
date_raw = None
att_list = []
for att in msg.attachments:
if att.data and att.longFilename:
att_list.append({
"@odata.type": "#microsoft.graph.fileAttachment",
"name": att.longFilename,
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
"contentBytes": base64.b64encode(att.data).decode(),
})
msg.close()
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
# Map folder and ensure it exists
folder_parts = _map_jnj_folder(folder)
folder_id = _ensure_folder(folder_parts)
payload = {
"subject": subject,
"body": {
"contentType": "HTML" if body_html else "Text",
"content": body_html or body_text,
},
"from": _make_recipient(f"{sender_name} <{sender_email}>"),
"toRecipients": [_make_recipient(a) for a in to_list],
"ccRecipients": [_make_recipient(a) for a in cc_list],
"isRead": True,
"singleValueExtendedProperties": [
{"id": "Integer 0x0E07", "value": "1"}
],
}
if date_raw:
try:
dt = dtparser.parse(str(date_raw))
payload["receivedDateTime"] = dt.astimezone(timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
except Exception:
pass
if att_list:
payload["attachments"] = att_list
headers = _graph_headers()
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code == 401:
_get_graph_token()
headers = _graph_headers()
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code in (200, 201):
msg_id = r.json().get("id", "")
log.info("Graph OK: %s%s", subject[:60], "/".join(folder_parts))
return msg_id
else:
log.error("Graph FAIL [%d]: %s | %s", r.status_code, subject[:60], r.text[:200])
return None
except Exception as e:
log.error("Graph import error for %s: %s", msg_path.name, e)
return None
@app.post("/upload")
async def upload_msg(
file: UploadFile = File(...),
authorization: str = Header(None),
folder: str = Form(""),
):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
is_encrypted = file.filename.endswith(".emsg")
if not file.filename.endswith(".msg") and not is_encrypted:
raise HTTPException(status_code=400, detail="Only .msg or .emsg files accepted")
# Ukládáme vždy jako .msg
msg_filename = file.filename[:-5] + ".msg" if is_encrypted else file.filename
dest = SAVE_DIR / msg_filename
if dest.exists():
return {"status": "exists", "file": msg_filename}
content = await file.read()
if is_encrypted:
content = _FERNET.decrypt(content)
with dest.open("wb") as f:
f.write(content)
# Import to Graph API if folder was provided by client
graph_id = None
if folder:
graph_id = _import_msg_to_graph(dest, folder)
return {
"status": "saved",
"file": msg_filename,
"graph_id": graph_id,
}
@app.post("/upload-db")
async def upload_db(
file: UploadFile = File(...),
authorization: str = Header(None)
):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
if not file.filename.endswith(".db"):
raise HTTPException(status_code=400, detail="Only .db files accepted")
for old in DB_DIR.glob("*.db"):
old.unlink()
dest = DB_DIR / file.filename
with dest.open("wb") as f:
shutil.copyfileobj(file.file, f)
return {"status": "saved", "file": file.filename}
class MessageDeleteRequest(BaseModel):
graph_id: str
class MessageUpdateRequest(BaseModel):
graph_id: str
is_read: Optional[bool] = None
folder: Optional[str] = None
def _retry_graph(method, url, headers_fn, **kwargs):
"""Call Graph API, refresh token once on 401."""
headers = headers_fn()
r = method(url, headers=headers, **kwargs)
if r.status_code == 401:
_get_graph_token()
headers = headers_fn()
r = method(url, headers=headers, **kwargs)
return r
@app.post("/message-delete")
async def message_delete(req: MessageDeleteRequest, authorization: str = Header(None)):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{req.graph_id}"
r = _retry_graph(http_requests.delete, url, _graph_headers, timeout=15)
if r.status_code in (200, 204):
log.info("Graph DELETE OK: %s", req.graph_id)
return {"status": "deleted"}
raise HTTPException(status_code=500, detail=f"Graph DELETE failed: {r.status_code} {r.text[:200]}")
@app.post("/message-update")
async def message_update(req: MessageUpdateRequest, authorization: str = Header(None)):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
current_graph_id = req.graph_id
result: dict = {"status": "ok"}
# Move first — returns new graph_id which we use for subsequent read-status update
if req.folder:
folder_parts = _map_jnj_folder(req.folder)
folder_id = _ensure_folder(folder_parts)
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{current_graph_id}/move"
r = _retry_graph(http_requests.post, url, _graph_headers,
json={"destinationId": folder_id}, timeout=15)
if r.status_code in (200, 201):
current_graph_id = r.json().get("id", current_graph_id)
result["moved"] = True
log.info("Graph MOVE OK: %s%s", req.graph_id, "/".join(folder_parts))
else:
log.error("Graph MOVE FAIL [%d]: %s", r.status_code, r.text[:200])
result["moved"] = False
if req.is_read is not None:
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{current_graph_id}"
r = _retry_graph(http_requests.patch, url, _graph_headers,
json={"isRead": req.is_read}, timeout=15)
result["read_updated"] = r.status_code in (200, 201)
if not result["read_updated"]:
log.error("Graph PATCH isRead FAIL [%d]: %s", r.status_code, r.text[:200])
result["graph_id"] = current_graph_id
return result
@app.post("/upload-file")
async def upload_file(
file: UploadFile = File(...),
authorization: str = Header(None),
):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
if not DROPBOX_REFRESH_TOKEN:
raise HTTPException(status_code=500, detail="Dropbox not configured")
is_encrypted = file.filename.endswith(".enc")
orig_filename = file.filename[:-4] if is_encrypted else file.filename
raw = await file.read()
file_content = _FERNET.decrypt(raw) if is_encrypted else raw
dbx = dropbox.Dropbox(
app_key=DROPBOX_APP_KEY,
app_secret=DROPBOX_APP_SECRET,
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
)
dropbox_path = f"/!!!Days/Downloads Z230/{orig_filename}"
dbx.files_upload(file_content, dropbox_path, mode=dropbox.files.WriteMode.overwrite)
return {"status": "uploaded", "file": orig_filename, "dropbox_path": dropbox_path}
@app.get("/status")
async def pending_files(authorization: str = Header(None)):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
dbx = dropbox.Dropbox(
app_key=DROPBOX_APP_KEY,
app_secret=DROPBOX_APP_SECRET,
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
)
try:
result = dbx.files_list_folder(DROPBOX_UPLOAD_TO_JNJ)
files = [e.name for e in result.entries if isinstance(e, dropbox.files.FileMetadata)]
except Exception:
files = []
log.info("pending-files: %d souboru", len(files))
return {"files": files}
@app.get("/item/{filename:path}")
async def download_file(filename: str, authorization: str = Header(None)):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
dbx = dropbox.Dropbox(
app_key=DROPBOX_APP_KEY,
app_secret=DROPBOX_APP_SECRET,
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
)
dropbox_path = f"{DROPBOX_UPLOAD_TO_JNJ}/{filename}"
try:
_, response = dbx.files_download(dropbox_path)
raw = response.content
except Exception as e:
log.error("download-file: nelze stáhnout %s: %s", filename, e)
raise HTTPException(status_code=404, detail=f"Soubor nenalezen: {filename}")
encrypted = _FERNET.encrypt(raw)
# Přesun do Sent
sent_path = f"{DROPBOX_UPLOAD_TO_JNJ}/##Trash/{filename}"
try:
dbx.files_move_v2(dropbox_path, sent_path, autorename=True)
log.info("download-file: %s přesunut do Sent", filename)
except Exception as e:
log.warning("download-file: nelze přesunout %s do Sent: %s", filename, e)
return Response(
content=encrypted,
media_type="application/octet-stream",
headers={"Content-Disposition": f'attachment; filename="{filename}.enc"'},
)