Initial commit — clean history (removed large test files, browser profiles, Medidata/Clario downloads)

This commit is contained in:
2026-06-01 15:36:31 +02:00
commit bb604e593e
1304 changed files with 116480 additions and 0 deletions
+412
View File
@@ -0,0 +1,412 @@
# app.py | v1.6 | 2026-06-01
# FastAPI server pro příjem .msg a .db souborů, upload do Dropboxu a import do Graph API.
# Endpointy: /upload (.msg → /msgs + Graph import), /upload-db (.db → /msgs/db),
# /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230),
# /message-delete, /message-update (sync: smazání, přečtení, přesun složky).
from fastapi import FastAPI, UploadFile, File, Form, Header, HTTPException
from pydantic import BaseModel
import shutil
import base64
import hashlib
import logging
from pathlib import Path
from typing import Optional
import os
import dropbox
import msal
import requests as http_requests
import extract_msg
from dateutil import parser as dtparser
from datetime import timezone
from dotenv import load_dotenv
from cryptography.fernet import Fernet
load_dotenv(Path(__file__).parent / ".env")
app = FastAPI()
log = logging.getLogger("msgreceiver")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
# Šifrovací klíč odvozený z TOKENu (Fernet = AES-128 CBC + HMAC)
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
SAVE_DIR = Path("/msgs")
DB_DIR = Path("/msgs/db")
SAVE_DIR.mkdir(parents=True, exist_ok=True)
DB_DIR.mkdir(parents=True, exist_ok=True)
DROPBOX_APP_KEY = os.getenv("DROPBOX_APP_KEY", "")
DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET", "")
DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_APP_REFRESH_TOKEN", "")
# --- Graph API config ---
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
GRAPH_ROOT_FOLDER = "JNJ" # subfolder under Inbox — root for imported emails
GRAPH_URL = "https://graph.microsoft.com/v1.0"
# Cache: folder path → Graph folder ID
_folder_id_cache: dict[str, str] = {}
_graph_token: Optional[str] = None
def _get_graph_token() -> str:
global _graph_token
msalapp = msal.ConfidentialClientApplication(
GRAPH_CLIENT_ID,
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
client_credential=GRAPH_CLIENT_SECRET,
)
result = msalapp.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" not in result:
raise RuntimeError(f"Graph auth failed: {result}")
_graph_token = result["access_token"]
return _graph_token
def _graph_headers() -> dict:
token = _graph_token or _get_graph_token()
return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
def _ensure_folder(path_parts: list[str]) -> str:
"""Ensure folder hierarchy exists under Inbox, return leaf folder ID."""
cache_key = "/".join(path_parts)
if cache_key in _folder_id_cache:
return _folder_id_cache[cache_key]
headers = _graph_headers()
parent_id = "Inbox"
for i, part in enumerate(path_parts):
partial_key = "/".join(path_parts[: i + 1])
if partial_key in _folder_id_cache:
parent_id = _folder_id_cache[partial_key]
continue
# List children of parent
if parent_id == "Inbox":
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders"
else:
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
r = http_requests.get(url, headers=headers, timeout=15)
if r.status_code == 401:
_get_graph_token()
headers = _graph_headers()
r = http_requests.get(url, headers=headers, timeout=15)
found = None
for f in r.json().get("value", []):
if f["displayName"].lower() == part.lower():
found = f["id"]
break
if not found:
# Create folder
cr = http_requests.post(url, headers=headers, json={"displayName": part}, timeout=15)
if cr.status_code in (200, 201):
found = cr.json()["id"]
elif cr.status_code == 409:
# Already exists (race condition) — re-fetch
r2 = http_requests.get(url, headers=headers, timeout=15)
for f in r2.json().get("value", []):
if f["displayName"].lower() == part.lower():
found = f["id"]
break
if not found:
raise RuntimeError(f"Cannot create folder '{part}': {cr.text}")
_folder_id_cache[partial_key] = found
parent_id = found
return parent_id
def _map_jnj_folder(folder: str) -> list[str]:
"""Map JNJ folder path to Graph folder parts under JNJ root.
'/vbuzalka@its.jnj.com/Inbox/TMP' → ['JNJ', 'Inbox', 'TMP']
'/Online Archive - vbuzalka@its.jnj.com/Inbox' → ['JNJ', 'Online Archive', 'Inbox']
"""
parts = [p for p in folder.split("/") if p]
if not parts:
return [GRAPH_ROOT_FOLDER]
# First part is mailbox name — strip it but detect Online Archive
mailbox = parts[0]
rest = parts[1:]
prefix = [GRAPH_ROOT_FOLDER]
if "online archive" in mailbox.lower():
prefix.append("Online Archive")
return prefix + rest if rest else prefix
def _make_recipient(addr: str) -> dict:
if "<" in addr and ">" in addr:
name = addr[: addr.index("<")].strip().strip('"')
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
else:
name = addr
email = addr
return {"emailAddress": {"name": name, "address": email}}
def _import_msg_to_graph(msg_path: Path, folder: str) -> Optional[str]:
"""Parse .msg and import into Graph API mailbox. Returns message ID or None."""
try:
msg = extract_msg.Message(str(msg_path))
subject = msg.subject or "(no subject)"
# Čtení těla — extract_msg může selhat na nestandartním kódování (cp1252 apod.)
try:
body_html = msg.htmlBody
if isinstance(body_html, bytes):
body_html = body_html.decode("utf-8", errors="replace")
except Exception:
body_html = None
try:
body_text = msg.body or ""
except Exception:
body_text = ""
try:
sender_email = msg.sender or ""
except Exception:
sender_email = ""
try:
sender_name = getattr(msg, "senderName", None) or sender_email
except Exception:
sender_name = sender_email
try:
to_raw = msg.to or ""
except Exception:
to_raw = ""
try:
cc_raw = msg.cc or ""
except Exception:
cc_raw = ""
try:
date_raw = msg.date
except Exception:
date_raw = None
att_list = []
for att in msg.attachments:
if att.data and att.longFilename:
att_list.append({
"@odata.type": "#microsoft.graph.fileAttachment",
"name": att.longFilename,
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
"contentBytes": base64.b64encode(att.data).decode(),
})
msg.close()
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
# Map folder and ensure it exists
folder_parts = _map_jnj_folder(folder)
folder_id = _ensure_folder(folder_parts)
payload = {
"subject": subject,
"body": {
"contentType": "HTML" if body_html else "Text",
"content": body_html or body_text,
},
"from": _make_recipient(f"{sender_name} <{sender_email}>"),
"toRecipients": [_make_recipient(a) for a in to_list],
"ccRecipients": [_make_recipient(a) for a in cc_list],
"isRead": True,
"singleValueExtendedProperties": [
{"id": "Integer 0x0E07", "value": "1"}
],
}
if date_raw:
try:
dt = dtparser.parse(str(date_raw))
payload["receivedDateTime"] = dt.astimezone(timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
except Exception:
pass
if att_list:
payload["attachments"] = att_list
headers = _graph_headers()
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code == 401:
_get_graph_token()
headers = _graph_headers()
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code in (200, 201):
msg_id = r.json().get("id", "")
log.info("Graph OK: %s%s", subject[:60], "/".join(folder_parts))
return msg_id
else:
log.error("Graph FAIL [%d]: %s | %s", r.status_code, subject[:60], r.text[:200])
return None
except Exception as e:
log.error("Graph import error for %s: %s", msg_path.name, e)
return None
@app.post("/upload")
async def upload_msg(
file: UploadFile = File(...),
authorization: str = Header(None),
folder: str = Form(""),
):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
is_encrypted = file.filename.endswith(".emsg")
if not file.filename.endswith(".msg") and not is_encrypted:
raise HTTPException(status_code=400, detail="Only .msg or .emsg files accepted")
# Ukládáme vždy jako .msg
msg_filename = file.filename[:-5] + ".msg" if is_encrypted else file.filename
dest = SAVE_DIR / msg_filename
if dest.exists():
return {"status": "exists", "file": msg_filename}
content = await file.read()
if is_encrypted:
content = _FERNET.decrypt(content)
with dest.open("wb") as f:
f.write(content)
# Import to Graph API if folder was provided by client
graph_id = None
if folder:
graph_id = _import_msg_to_graph(dest, folder)
return {
"status": "saved",
"file": msg_filename,
"graph_id": graph_id,
}
@app.post("/upload-db")
async def upload_db(
file: UploadFile = File(...),
authorization: str = Header(None)
):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
if not file.filename.endswith(".db"):
raise HTTPException(status_code=400, detail="Only .db files accepted")
for old in DB_DIR.glob("*.db"):
old.unlink()
dest = DB_DIR / file.filename
with dest.open("wb") as f:
shutil.copyfileobj(file.file, f)
return {"status": "saved", "file": file.filename}
class MessageDeleteRequest(BaseModel):
graph_id: str
class MessageUpdateRequest(BaseModel):
graph_id: str
is_read: Optional[bool] = None
folder: Optional[str] = None
def _retry_graph(method, url, headers_fn, **kwargs):
"""Call Graph API, refresh token once on 401."""
headers = headers_fn()
r = method(url, headers=headers, **kwargs)
if r.status_code == 401:
_get_graph_token()
headers = headers_fn()
r = method(url, headers=headers, **kwargs)
return r
@app.post("/message-delete")
async def message_delete(req: MessageDeleteRequest, authorization: str = Header(None)):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{req.graph_id}"
r = _retry_graph(http_requests.delete, url, _graph_headers, timeout=15)
if r.status_code in (200, 204):
log.info("Graph DELETE OK: %s", req.graph_id)
return {"status": "deleted"}
raise HTTPException(status_code=500, detail=f"Graph DELETE failed: {r.status_code} {r.text[:200]}")
@app.post("/message-update")
async def message_update(req: MessageUpdateRequest, authorization: str = Header(None)):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
current_graph_id = req.graph_id
result: dict = {"status": "ok"}
# Move first — returns new graph_id which we use for subsequent read-status update
if req.folder:
folder_parts = _map_jnj_folder(req.folder)
folder_id = _ensure_folder(folder_parts)
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{current_graph_id}/move"
r = _retry_graph(http_requests.post, url, _graph_headers,
json={"destinationId": folder_id}, timeout=15)
if r.status_code in (200, 201):
current_graph_id = r.json().get("id", current_graph_id)
result["moved"] = True
log.info("Graph MOVE OK: %s%s", req.graph_id, "/".join(folder_parts))
else:
log.error("Graph MOVE FAIL [%d]: %s", r.status_code, r.text[:200])
result["moved"] = False
if req.is_read is not None:
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{current_graph_id}"
r = _retry_graph(http_requests.patch, url, _graph_headers,
json={"isRead": req.is_read}, timeout=15)
result["read_updated"] = r.status_code in (200, 201)
if not result["read_updated"]:
log.error("Graph PATCH isRead FAIL [%d]: %s", r.status_code, r.text[:200])
result["graph_id"] = current_graph_id
return result
@app.post("/upload-dropbox")
async def upload_dropbox(
file: UploadFile = File(...),
authorization: str = Header(None),
):
if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized")
if not DROPBOX_REFRESH_TOKEN:
raise HTTPException(status_code=500, detail="Dropbox not configured")
content = await file.read()
dbx = dropbox.Dropbox(
app_key=DROPBOX_APP_KEY,
app_secret=DROPBOX_APP_SECRET,
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
)
dropbox_path = f"/!!!Days/Downloads Z230/{file.filename}"
dbx.files_upload(content, dropbox_path, mode=dropbox.files.WriteMode.overwrite)
return {"status": "uploaded", "file": file.filename, "dropbox_path": dropbox_path}