notebook
@@ -7,6 +7,8 @@
|
|||||||
## Kopírování souborů z Windows
|
## Kopírování souborů z Windows
|
||||||
Všechny soubory z `U:\janssen\EmailsImport\DockerCustomApp\` nakopírovat do `\\tower\appdata\msgreceiver\`.
|
Všechny soubory z `U:\janssen\EmailsImport\DockerCustomApp\` nakopírovat do `\\tower\appdata\msgreceiver\`.
|
||||||
|
|
||||||
|
**DŮLEŽITÉ:** Po každé změně `app.py` je nutný rebuild a restart kontejneru (viz níže). Bez toho běží stará verze.
|
||||||
|
|
||||||
## Build & restart (SSH)
|
## Build & restart (SSH)
|
||||||
```bash
|
```bash
|
||||||
# Připojení: ssh root@192.168.1.76, heslo: 7309208104
|
# Připojení: ssh root@192.168.1.76, heslo: 7309208104
|
||||||
@@ -26,6 +28,18 @@ docker run -d --name msgreceiver \
|
|||||||
## Kontejner
|
## Kontejner
|
||||||
- Port: 8765
|
- Port: 8765
|
||||||
- Restart policy: unless-stopped
|
- Restart policy: unless-stopped
|
||||||
- Endpointy: `/upload` (msg), `/upload-db` (db), `/upload-dropbox` (soubory do Dropboxu)
|
- Endpointy:
|
||||||
|
- `/upload` (msg + volitelný `folder` → uloží na disk + import do Graph API)
|
||||||
|
- `/upload-db` (db → /msgs/db, maže staré)
|
||||||
|
- `/upload-dropbox` (soubory do Dropboxu)
|
||||||
- Auth: Bearer token v app.py
|
- Auth: Bearer token v app.py
|
||||||
- Dropbox credentials: v `.env` uvnitř image
|
- Dropbox credentials: v `.env` uvnitř image
|
||||||
|
- Graph API credentials: přímo v app.py (Mail.ReadWrite + Mail.Send, tenant TrialHelp s.r.o.)
|
||||||
|
|
||||||
|
## Graph import
|
||||||
|
Při uploadu .msg s parametrem `folder` (plná cesta z JNJ Outlooku) server:
|
||||||
|
1. Uloží .msg na disk
|
||||||
|
2. Parsuje .msg a importuje do schránky `vladimir.buzalka@buzalka.cz` do `Inbox/JNJ/...`
|
||||||
|
3. Složky se vytvářejí automaticky, mapování: `/vbuzalka@its.jnj.com/X` → `JNJ/X`, `/Online Archive.../X` → `JNJ/Online Archive/X`
|
||||||
|
|
||||||
|
Klient v1.4 (`janssenpc_email_send_new_v1.4.py`) posílá `folder` automaticky.
|
||||||
|
|||||||
@@ -1,17 +1,28 @@
|
|||||||
# app.py | v1.0 | 2026-05-29
|
# app.py | v1.3 | 2026-05-29
|
||||||
# FastAPI server pro příjem .msg a .db souborů a upload do Dropboxu.
|
# FastAPI server pro příjem .msg a .db souborů, upload do Dropboxu a import do Graph API.
|
||||||
# Endpointy: /upload (.msg → /msgs), /upload-db (.db → /msgs/db), /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230).
|
# Endpointy: /upload (.msg → /msgs + Graph import), /upload-db (.db → /msgs/db),
|
||||||
|
# /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230).
|
||||||
|
|
||||||
from fastapi import FastAPI, UploadFile, File, Header, HTTPException
|
from fastapi import FastAPI, UploadFile, File, Form, Header, HTTPException
|
||||||
import shutil
|
import shutil
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
import os
|
import os
|
||||||
import dropbox
|
import dropbox
|
||||||
|
import msal
|
||||||
|
import requests as http_requests
|
||||||
|
import extract_msg
|
||||||
|
from dateutil import parser as dtparser
|
||||||
|
from datetime import timezone
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv(Path(__file__).parent / ".env")
|
load_dotenv(Path(__file__).parent / ".env")
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
log = logging.getLogger("msgreceiver")
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||||
|
|
||||||
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
|
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
|
||||||
SAVE_DIR = Path("/msgs")
|
SAVE_DIR = Path("/msgs")
|
||||||
@@ -24,11 +35,213 @@ DROPBOX_APP_KEY = os.getenv("DROPBOX_APP_KEY", "")
|
|||||||
DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET", "")
|
DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET", "")
|
||||||
DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_APP_REFRESH_TOKEN", "")
|
DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_APP_REFRESH_TOKEN", "")
|
||||||
|
|
||||||
|
# --- Graph API config ---
|
||||||
|
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
|
||||||
|
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
|
||||||
|
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
|
||||||
|
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
|
||||||
|
GRAPH_ROOT_FOLDER = "JNJ" # subfolder under Inbox — root for imported emails
|
||||||
|
GRAPH_URL = "https://graph.microsoft.com/v1.0"
|
||||||
|
|
||||||
|
# Cache: folder path → Graph folder ID
|
||||||
|
_folder_id_cache: dict[str, str] = {}
|
||||||
|
_graph_token: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_graph_token() -> str:
|
||||||
|
global _graph_token
|
||||||
|
msalapp = msal.ConfidentialClientApplication(
|
||||||
|
GRAPH_CLIENT_ID,
|
||||||
|
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
|
||||||
|
client_credential=GRAPH_CLIENT_SECRET,
|
||||||
|
)
|
||||||
|
result = msalapp.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
|
||||||
|
if "access_token" not in result:
|
||||||
|
raise RuntimeError(f"Graph auth failed: {result}")
|
||||||
|
_graph_token = result["access_token"]
|
||||||
|
return _graph_token
|
||||||
|
|
||||||
|
|
||||||
|
def _graph_headers() -> dict:
|
||||||
|
token = _graph_token or _get_graph_token()
|
||||||
|
return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_folder(path_parts: list[str]) -> str:
|
||||||
|
"""Ensure folder hierarchy exists under Inbox, return leaf folder ID."""
|
||||||
|
cache_key = "/".join(path_parts)
|
||||||
|
if cache_key in _folder_id_cache:
|
||||||
|
return _folder_id_cache[cache_key]
|
||||||
|
|
||||||
|
headers = _graph_headers()
|
||||||
|
parent_id = "Inbox"
|
||||||
|
|
||||||
|
for i, part in enumerate(path_parts):
|
||||||
|
partial_key = "/".join(path_parts[: i + 1])
|
||||||
|
if partial_key in _folder_id_cache:
|
||||||
|
parent_id = _folder_id_cache[partial_key]
|
||||||
|
continue
|
||||||
|
|
||||||
|
# List children of parent
|
||||||
|
if parent_id == "Inbox":
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders"
|
||||||
|
else:
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
|
||||||
|
|
||||||
|
r = http_requests.get(url, headers=headers, timeout=15)
|
||||||
|
if r.status_code == 401:
|
||||||
|
_get_graph_token()
|
||||||
|
headers = _graph_headers()
|
||||||
|
r = http_requests.get(url, headers=headers, timeout=15)
|
||||||
|
|
||||||
|
found = None
|
||||||
|
for f in r.json().get("value", []):
|
||||||
|
if f["displayName"].lower() == part.lower():
|
||||||
|
found = f["id"]
|
||||||
|
break
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
# Create folder
|
||||||
|
cr = http_requests.post(url, headers=headers, json={"displayName": part}, timeout=15)
|
||||||
|
if cr.status_code in (200, 201):
|
||||||
|
found = cr.json()["id"]
|
||||||
|
elif cr.status_code == 409:
|
||||||
|
# Already exists (race condition) — re-fetch
|
||||||
|
r2 = http_requests.get(url, headers=headers, timeout=15)
|
||||||
|
for f in r2.json().get("value", []):
|
||||||
|
if f["displayName"].lower() == part.lower():
|
||||||
|
found = f["id"]
|
||||||
|
break
|
||||||
|
if not found:
|
||||||
|
raise RuntimeError(f"Cannot create folder '{part}': {cr.text}")
|
||||||
|
|
||||||
|
_folder_id_cache[partial_key] = found
|
||||||
|
parent_id = found
|
||||||
|
|
||||||
|
return parent_id
|
||||||
|
|
||||||
|
|
||||||
|
def _map_jnj_folder(folder: str) -> list[str]:
|
||||||
|
"""Map JNJ folder path to Graph folder parts under JNJ root.
|
||||||
|
|
||||||
|
'/vbuzalka@its.jnj.com/Inbox/TMP' → ['JNJ', 'Inbox', 'TMP']
|
||||||
|
'/Online Archive - vbuzalka@its.jnj.com/Inbox' → ['JNJ', 'Online Archive', 'Inbox']
|
||||||
|
"""
|
||||||
|
parts = [p for p in folder.split("/") if p]
|
||||||
|
if not parts:
|
||||||
|
return [GRAPH_ROOT_FOLDER]
|
||||||
|
|
||||||
|
# First part is mailbox name — strip it but detect Online Archive
|
||||||
|
mailbox = parts[0]
|
||||||
|
rest = parts[1:]
|
||||||
|
|
||||||
|
prefix = [GRAPH_ROOT_FOLDER]
|
||||||
|
if "online archive" in mailbox.lower():
|
||||||
|
prefix.append("Online Archive")
|
||||||
|
|
||||||
|
return prefix + rest if rest else prefix
|
||||||
|
|
||||||
|
|
||||||
|
def _make_recipient(addr: str) -> dict:
|
||||||
|
if "<" in addr and ">" in addr:
|
||||||
|
name = addr[: addr.index("<")].strip().strip('"')
|
||||||
|
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
|
||||||
|
else:
|
||||||
|
name = addr
|
||||||
|
email = addr
|
||||||
|
return {"emailAddress": {"name": name, "address": email}}
|
||||||
|
|
||||||
|
|
||||||
|
def _import_msg_to_graph(msg_path: Path, folder: str) -> Optional[str]:
|
||||||
|
"""Parse .msg and import into Graph API mailbox. Returns message ID or None."""
|
||||||
|
try:
|
||||||
|
msg = extract_msg.Message(str(msg_path))
|
||||||
|
|
||||||
|
subject = msg.subject or "(no subject)"
|
||||||
|
body_html = msg.htmlBody
|
||||||
|
if isinstance(body_html, bytes):
|
||||||
|
body_html = body_html.decode("utf-8", errors="replace")
|
||||||
|
body_text = msg.body or ""
|
||||||
|
|
||||||
|
sender_email = msg.sender or ""
|
||||||
|
sender_name = getattr(msg, "senderName", None) or sender_email
|
||||||
|
to_raw = msg.to or ""
|
||||||
|
cc_raw = msg.cc or ""
|
||||||
|
date_raw = msg.date
|
||||||
|
|
||||||
|
att_list = []
|
||||||
|
for att in msg.attachments:
|
||||||
|
if att.data and att.longFilename:
|
||||||
|
att_list.append({
|
||||||
|
"@odata.type": "#microsoft.graph.fileAttachment",
|
||||||
|
"name": att.longFilename,
|
||||||
|
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
|
||||||
|
"contentBytes": base64.b64encode(att.data).decode(),
|
||||||
|
})
|
||||||
|
|
||||||
|
msg.close()
|
||||||
|
|
||||||
|
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
|
||||||
|
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
|
||||||
|
|
||||||
|
# Map folder and ensure it exists
|
||||||
|
folder_parts = _map_jnj_folder(folder)
|
||||||
|
folder_id = _ensure_folder(folder_parts)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"subject": subject,
|
||||||
|
"body": {
|
||||||
|
"contentType": "HTML" if body_html else "Text",
|
||||||
|
"content": body_html or body_text,
|
||||||
|
},
|
||||||
|
"from": _make_recipient(f"{sender_name} <{sender_email}>"),
|
||||||
|
"toRecipients": [_make_recipient(a) for a in to_list],
|
||||||
|
"ccRecipients": [_make_recipient(a) for a in cc_list],
|
||||||
|
"isRead": True,
|
||||||
|
"singleValueExtendedProperties": [
|
||||||
|
{"id": "Integer 0x0E07", "value": "1"}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
if date_raw:
|
||||||
|
try:
|
||||||
|
dt = dtparser.parse(str(date_raw))
|
||||||
|
payload["receivedDateTime"] = dt.astimezone(timezone.utc).strftime(
|
||||||
|
"%Y-%m-%dT%H:%M:%SZ"
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if att_list:
|
||||||
|
payload["attachments"] = att_list
|
||||||
|
|
||||||
|
headers = _graph_headers()
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
|
||||||
|
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
|
||||||
|
|
||||||
|
if r.status_code == 401:
|
||||||
|
_get_graph_token()
|
||||||
|
headers = _graph_headers()
|
||||||
|
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
|
||||||
|
|
||||||
|
if r.status_code in (200, 201):
|
||||||
|
msg_id = r.json().get("id", "")
|
||||||
|
log.info("Graph OK: %s → %s", subject[:60], "/".join(folder_parts))
|
||||||
|
return msg_id
|
||||||
|
else:
|
||||||
|
log.error("Graph FAIL [%d]: %s | %s", r.status_code, subject[:60], r.text[:200])
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.error("Graph import error for %s: %s", msg_path.name, e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@app.post("/upload")
|
@app.post("/upload")
|
||||||
async def upload_msg(
|
async def upload_msg(
|
||||||
file: UploadFile = File(...),
|
file: UploadFile = File(...),
|
||||||
authorization: str = Header(None)
|
authorization: str = Header(None),
|
||||||
|
folder: str = Form(""),
|
||||||
):
|
):
|
||||||
if authorization != f"Bearer {TOKEN}":
|
if authorization != f"Bearer {TOKEN}":
|
||||||
raise HTTPException(status_code=401, detail="Unauthorized")
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
@@ -39,7 +252,17 @@ async def upload_msg(
|
|||||||
return {"status": "exists", "file": file.filename}
|
return {"status": "exists", "file": file.filename}
|
||||||
with dest.open("wb") as f:
|
with dest.open("wb") as f:
|
||||||
shutil.copyfileobj(file.file, f)
|
shutil.copyfileobj(file.file, f)
|
||||||
return {"status": "saved", "file": file.filename}
|
|
||||||
|
# Import to Graph API if folder was provided by client
|
||||||
|
graph_id = None
|
||||||
|
if folder:
|
||||||
|
graph_id = _import_msg_to_graph(dest, folder)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "saved",
|
||||||
|
"file": file.filename,
|
||||||
|
"graph_imported": graph_id is not None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/upload-db")
|
@app.post("/upload-db")
|
||||||
|
|||||||
@@ -3,3 +3,7 @@ uvicorn
|
|||||||
python-multipart
|
python-multipart
|
||||||
dropbox
|
dropbox
|
||||||
python-dotenv
|
python-dotenv
|
||||||
|
msal
|
||||||
|
requests
|
||||||
|
extract-msg
|
||||||
|
python-dateutil
|
||||||
@@ -0,0 +1,180 @@
|
|||||||
|
# test_import_msg.py — pokusný import .msg do schránky přes Graph API
|
||||||
|
# Parsuje .msg soubor a vytvoří zprávu v Inbox cílové schránky.
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import msal
|
||||||
|
import requests
|
||||||
|
import extract_msg
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# === CONFIG ===
|
||||||
|
TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
|
||||||
|
CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
|
||||||
|
CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
|
||||||
|
MAILBOX = "vladimir.buzalka@buzalka.cz"
|
||||||
|
|
||||||
|
AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
|
||||||
|
SCOPE = ["https://graph.microsoft.com/.default"]
|
||||||
|
GRAPH_URL = "https://graph.microsoft.com/v1.0"
|
||||||
|
TARGET_FOLDER = "JNJ" # subfolder under Inbox
|
||||||
|
|
||||||
|
# === MSG FILE ===
|
||||||
|
MSG_PATH = Path(__file__).parent / "FC130007ACFE5DCB0000.msg"
|
||||||
|
|
||||||
|
|
||||||
|
def get_token():
|
||||||
|
app = msal.ConfidentialClientApplication(
|
||||||
|
CLIENT_ID, authority=AUTHORITY, client_credential=CLIENT_SECRET
|
||||||
|
)
|
||||||
|
token = app.acquire_token_for_client(scopes=SCOPE)
|
||||||
|
if "access_token" not in token:
|
||||||
|
raise RuntimeError(f"Auth failed: {token}")
|
||||||
|
return token["access_token"]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_msg(path):
|
||||||
|
"""Parse .msg file and return dict with message properties."""
|
||||||
|
msg = extract_msg.Message(str(path))
|
||||||
|
|
||||||
|
# Read all properties before closing
|
||||||
|
subject = msg.subject or "(no subject)"
|
||||||
|
body_html = msg.htmlBody
|
||||||
|
if isinstance(body_html, bytes):
|
||||||
|
body_html = body_html.decode("utf-8", errors="replace")
|
||||||
|
body_text = msg.body or ""
|
||||||
|
|
||||||
|
sender_email = msg.sender or ""
|
||||||
|
sender_name = getattr(msg, "senderName", None) or sender_email
|
||||||
|
|
||||||
|
to_raw = msg.to or ""
|
||||||
|
cc_raw = msg.cc or ""
|
||||||
|
date_raw = msg.date
|
||||||
|
|
||||||
|
att_list = []
|
||||||
|
for att in msg.attachments:
|
||||||
|
if att.data and att.longFilename:
|
||||||
|
att_list.append({
|
||||||
|
"@odata.type": "#microsoft.graph.fileAttachment",
|
||||||
|
"name": att.longFilename,
|
||||||
|
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
|
||||||
|
"contentBytes": base64.b64encode(att.data).decode(),
|
||||||
|
})
|
||||||
|
|
||||||
|
msg.close()
|
||||||
|
|
||||||
|
# Process after close
|
||||||
|
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
|
||||||
|
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
|
||||||
|
received = str(date_raw) if date_raw else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"subject": subject,
|
||||||
|
"body_html": body_html,
|
||||||
|
"body_text": body_text,
|
||||||
|
"sender_email": sender_email,
|
||||||
|
"sender_name": sender_name,
|
||||||
|
"to": to_list,
|
||||||
|
"cc": cc_list,
|
||||||
|
"received": received,
|
||||||
|
"attachments": att_list,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def make_recipient(addr):
|
||||||
|
"""Create Graph API recipient object from email address."""
|
||||||
|
# Handle 'Name <email>' format
|
||||||
|
if "<" in addr and ">" in addr:
|
||||||
|
name = addr[:addr.index("<")].strip().strip('"')
|
||||||
|
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
|
||||||
|
else:
|
||||||
|
name = addr
|
||||||
|
email = addr
|
||||||
|
return {"emailAddress": {"name": name, "address": email}}
|
||||||
|
|
||||||
|
|
||||||
|
def import_msg(msg_path):
|
||||||
|
token = get_token()
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {token}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"Parsing: {msg_path}")
|
||||||
|
data = parse_msg(msg_path)
|
||||||
|
print(f" Subject: {data['subject']}")
|
||||||
|
print(f" From: {data['sender_name']} <{data['sender_email']}>")
|
||||||
|
print(f" To: {data['to']}")
|
||||||
|
print(f" Date: {data['received']}")
|
||||||
|
print(f" Attachments: {len(data['attachments'])}")
|
||||||
|
|
||||||
|
# 1. Create message in mailFolder (Inbox)
|
||||||
|
payload = {
|
||||||
|
"subject": data["subject"],
|
||||||
|
"body": {
|
||||||
|
"contentType": "HTML" if data["body_html"] else "Text",
|
||||||
|
"content": data["body_html"] or data["body_text"],
|
||||||
|
},
|
||||||
|
"from": make_recipient(
|
||||||
|
f"{data['sender_name']} <{data['sender_email']}>"
|
||||||
|
),
|
||||||
|
"toRecipients": [make_recipient(a) for a in data["to"]],
|
||||||
|
"ccRecipients": [make_recipient(a) for a in data["cc"]],
|
||||||
|
"isRead": True,
|
||||||
|
# PR_MESSAGE_FLAGS (0x0E07) = 1 → read, NOT draft (without MSGFLAG_UNSENT=0x08)
|
||||||
|
"singleValueExtendedProperties": [
|
||||||
|
{
|
||||||
|
"id": "Integer 0x0E07",
|
||||||
|
"value": "1",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
if data["received"]:
|
||||||
|
# Graph API expects ISO 8601 UTC format
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
try:
|
||||||
|
from dateutil import parser as dtparser
|
||||||
|
dt = dtparser.parse(data["received"])
|
||||||
|
payload["receivedDateTime"] = dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Warning: cannot parse date '{data['received']}': {e}")
|
||||||
|
|
||||||
|
if data["attachments"]:
|
||||||
|
payload["attachments"] = data["attachments"]
|
||||||
|
|
||||||
|
# Find target folder (Inbox/JNJ)
|
||||||
|
folder_url = f"{GRAPH_URL}/users/{MAILBOX}/mailFolders/Inbox/childFolders"
|
||||||
|
r_folders = requests.get(folder_url, headers=headers, timeout=15)
|
||||||
|
folder_id = None
|
||||||
|
for f in r_folders.json().get("value", []):
|
||||||
|
if f["displayName"].lower() == TARGET_FOLDER.lower():
|
||||||
|
folder_id = f["id"]
|
||||||
|
break
|
||||||
|
|
||||||
|
if not folder_id:
|
||||||
|
# Create the folder if it doesn't exist
|
||||||
|
r_create = requests.post(
|
||||||
|
folder_url, headers=headers,
|
||||||
|
json={"displayName": TARGET_FOLDER}, timeout=15
|
||||||
|
)
|
||||||
|
folder_id = r_create.json()["id"]
|
||||||
|
print(f" Created folder '{TARGET_FOLDER}'")
|
||||||
|
|
||||||
|
url = f"{GRAPH_URL}/users/{MAILBOX}/mailFolders/{folder_id}/messages"
|
||||||
|
print(f"\nPOST -> Inbox/{TARGET_FOLDER}")
|
||||||
|
|
||||||
|
r = requests.post(url, headers=headers, json=payload, timeout=30)
|
||||||
|
|
||||||
|
if r.status_code in (200, 201):
|
||||||
|
msg_id = r.json().get("id", "?")
|
||||||
|
print(f" OK! Message created, id={msg_id[:40]}...")
|
||||||
|
return r.json()
|
||||||
|
else:
|
||||||
|
print(f" FAILED [{r.status_code}]: {r.text[:500]}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
path = sys.argv[1] if len(sys.argv) > 1 else MSG_PATH
|
||||||
|
import_msg(Path(path))
|
||||||
@@ -0,0 +1,233 @@
|
|||||||
|
"""
|
||||||
|
janssenpc_email_send_new v1.4
|
||||||
|
Verze: 1.4.1
|
||||||
|
Datum: 2026-05-29
|
||||||
|
Popis: Prochází složky Inbox, Deleted Items a Sent Items v Outlooku (MAPI),
|
||||||
|
ukládá emailové zprávy jako .msg soubory a uploaduje je na https://msgs.buzalka.cz.
|
||||||
|
Zaznamenává zpracované zprávy do SQLite DB (jnjemails.db) a DB uploaduje na server
|
||||||
|
jednou za 24 hodin (ne při každém běhu). Podporuje pokračování od posledního
|
||||||
|
zpracovaného emailu (resume). Folder cesta obsahuje celé jméno schránky
|
||||||
|
(např. /vbuzalka@its.jnj.com/Inbox). Chyby se logují do jnjemails_errors.log.
|
||||||
|
"""
|
||||||
|
import win32com.client
|
||||||
|
import requests
|
||||||
|
import sqlite3
|
||||||
|
import urllib3
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import tempfile
|
||||||
|
import io
|
||||||
|
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
|
||||||
|
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
|
||||||
|
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
|
||||||
|
DB_UPLOAD_MARKER = r"C:\Users\vbuzalka\SQLITE\jnjemails_last_db_upload.txt"
|
||||||
|
DB_UPLOAD_INTERVAL_H = 24
|
||||||
|
LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails_errors.log"
|
||||||
|
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
|
||||||
|
|
||||||
|
# olFolderInbox=6, olFolderDeletedItems=3, olFolderSentMail=5
|
||||||
|
FOLDERS_TO_PROCESS = [6, 3, 5]
|
||||||
|
|
||||||
|
UPLOAD_LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails_uploads.log"
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
filename=LOG_PATH,
|
||||||
|
level=logging.ERROR,
|
||||||
|
format="%(asctime)s | %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Separate upload logger — logs every upload attempt
|
||||||
|
_upload_log = logging.getLogger("uploads")
|
||||||
|
_upload_log.setLevel(logging.DEBUG)
|
||||||
|
_uh = logging.FileHandler(UPLOAD_LOG_PATH, encoding="utf-8")
|
||||||
|
_uh.setFormatter(logging.Formatter("%(asctime)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
|
||||||
|
_upload_log.addHandler(_uh)
|
||||||
|
|
||||||
|
def init_db(conn):
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS messages (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
message_id TEXT NOT NULL,
|
||||||
|
subject TEXT,
|
||||||
|
sender TEXT,
|
||||||
|
received_at TEXT,
|
||||||
|
folder TEXT,
|
||||||
|
source TEXT,
|
||||||
|
uploaded_at TEXT DEFAULT (datetime('now'))
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def is_uploaded(conn, message_id):
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT 1 FROM messages WHERE message_id = ? LIMIT 1", (message_id,)
|
||||||
|
).fetchone()
|
||||||
|
return row is not None
|
||||||
|
|
||||||
|
def save_to_db(conn, message_id, subject, sender, received_at, folder, source):
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR IGNORE INTO messages (message_id, subject, sender, received_at, folder, source)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?)
|
||||||
|
""", (message_id, subject, sender, received_at, folder, source))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def _db_upload_due() -> bool:
|
||||||
|
"""Return True if 24h elapsed since last DB upload (or never uploaded)."""
|
||||||
|
marker = Path(DB_UPLOAD_MARKER)
|
||||||
|
if not marker.exists():
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
last = datetime.fromisoformat(marker.read_text().strip())
|
||||||
|
return (datetime.now() - last).total_seconds() >= DB_UPLOAD_INTERVAL_H * 3600
|
||||||
|
except Exception:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _db_upload_mark():
|
||||||
|
"""Write current timestamp to marker file."""
|
||||||
|
Path(DB_UPLOAD_MARKER).write_text(datetime.now().isoformat())
|
||||||
|
|
||||||
|
def upload_db(db_path, force=False):
|
||||||
|
if not force and not _db_upload_due():
|
||||||
|
return
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
filename = f"jnjemails_{timestamp}.db"
|
||||||
|
with open(db_path, "rb") as f:
|
||||||
|
resp = requests.post(
|
||||||
|
"https://msgs.buzalka.cz/upload-db",
|
||||||
|
headers={"Authorization": f"Bearer {TOKEN}"},
|
||||||
|
files={"file": (filename, f, "application/octet-stream")},
|
||||||
|
timeout=60
|
||||||
|
)
|
||||||
|
print(f" DB upload: {resp.json()}")
|
||||||
|
_db_upload_mark()
|
||||||
|
|
||||||
|
def upload_msg(msg_path, filename, folder=""):
|
||||||
|
_upload_log.info("UPLOAD %s | folder=%s", filename, folder)
|
||||||
|
with open(msg_path, "rb") as f:
|
||||||
|
resp = requests.post(
|
||||||
|
UPLOAD_URL,
|
||||||
|
headers={"Authorization": f"Bearer {TOKEN}"},
|
||||||
|
files={"file": (filename, f, "application/octet-stream")},
|
||||||
|
data={"folder": folder},
|
||||||
|
timeout=60
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
result = resp.json()
|
||||||
|
_upload_log.info("RESPONSE %s | %s", filename, result)
|
||||||
|
return result["status"]
|
||||||
|
|
||||||
|
def get_folder_resume_date(conn, folder_path):
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT MAX(received_at) FROM messages WHERE folder = ?",
|
||||||
|
(folder_path,)
|
||||||
|
).fetchone()
|
||||||
|
if not row or not row[0]:
|
||||||
|
return None
|
||||||
|
last_dt = datetime.fromisoformat(row[0])
|
||||||
|
return last_dt - timedelta(hours=1)
|
||||||
|
|
||||||
|
def process_folder(conn, folder, source, folder_path="", counter=None):
|
||||||
|
if counter is None:
|
||||||
|
counter = [0]
|
||||||
|
|
||||||
|
current_path = f"{folder_path}/{folder.Name}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
resume_dt = get_folder_resume_date(conn, current_path)
|
||||||
|
|
||||||
|
items = folder.Items
|
||||||
|
|
||||||
|
if resume_dt:
|
||||||
|
resume_str = resume_dt.strftime("%Y/%m/%d %H:%M:%S")
|
||||||
|
filter_str = f"@SQL=\"urn:schemas:httpmail:datereceived\" > '{resume_str}'"
|
||||||
|
items = folder.Items.Restrict(filter_str)
|
||||||
|
print(f"\n Složka: {current_path} | pokračuji od: {resume_str}")
|
||||||
|
else:
|
||||||
|
print(f"\n Složka: {current_path} | od začátku")
|
||||||
|
|
||||||
|
items.Sort("[ReceivedTime]", False)
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
skipped = 0
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
try:
|
||||||
|
if not item.MessageClass.upper().startswith("IPM.NOTE"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
|
||||||
|
except:
|
||||||
|
mid = None
|
||||||
|
|
||||||
|
if not mid:
|
||||||
|
mid = f"entryid:{item.EntryID}"
|
||||||
|
|
||||||
|
if is_uploaded(conn, mid):
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
safe_name = f"{item.EntryID[-20:]}.msg"
|
||||||
|
tmp_path = Path(tmp) / safe_name
|
||||||
|
item.SaveAs(str(tmp_path), 3)
|
||||||
|
status = upload_msg(tmp_path, safe_name, current_path)
|
||||||
|
|
||||||
|
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
|
||||||
|
save_to_db(conn, mid, item.Subject, item.SenderEmailAddress,
|
||||||
|
received, current_path, source)
|
||||||
|
|
||||||
|
counter[0] += 1
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
if counter[0] % 1000 == 0:
|
||||||
|
print(f" → celkem {counter[0]} emailů přeneseno, uploaduji DB...")
|
||||||
|
upload_db(DB_PATH)
|
||||||
|
|
||||||
|
print(f" {status.upper():6} | {item.Subject[:60]}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
subject = getattr(item, 'Subject', '?')
|
||||||
|
sender = getattr(item, 'SenderEmailAddress', '?')
|
||||||
|
received = getattr(item, 'ReceivedTime', '?')
|
||||||
|
print(f" CHYBA | {subject[:40]} | {e}")
|
||||||
|
logging.error("folder=%s | sender=%s | received=%s | subject=%s | error=%s",
|
||||||
|
current_path, sender, received, subject, e)
|
||||||
|
|
||||||
|
print(f" → složka hotova: přeneseno {count} | skip {skipped}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" CHYBA složka {current_path}: {e}")
|
||||||
|
logging.error("folder=%s | CHYBA SLOŽKY | error=%s", current_path, e)
|
||||||
|
|
||||||
|
for subfolder in folder.Folders:
|
||||||
|
process_folder(conn, subfolder, source, current_path, counter)
|
||||||
|
|
||||||
|
# --- MAIN ---
|
||||||
|
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
init_db(conn)
|
||||||
|
|
||||||
|
outlook = win32com.client.Dispatch("Outlook.Application")
|
||||||
|
ns = outlook.GetNamespace("MAPI")
|
||||||
|
|
||||||
|
counter = [0]
|
||||||
|
for folder_id in FOLDERS_TO_PROCESS:
|
||||||
|
folder = ns.GetDefaultFolder(folder_id)
|
||||||
|
mailbox_name = folder.Parent.Name
|
||||||
|
print(f"\n=== {folder.Name} ({mailbox_name}) ===")
|
||||||
|
process_folder(conn, folder, "mailbox", f"/{mailbox_name}", counter)
|
||||||
|
|
||||||
|
# Finální DB upload po dokončení
|
||||||
|
print("\nFinální upload DB...")
|
||||||
|
upload_db(DB_PATH)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print(f"\nHotovo. Chyby logovány do: {LOG_PATH}")
|
||||||
|
After Width: | Height: | Size: 28 KiB |
|
After Width: | Height: | Size: 30 KiB |
|
After Width: | Height: | Size: 30 KiB |
|
After Width: | Height: | Size: 9.6 KiB |
|
After Width: | Height: | Size: 28 KiB |
|
After Width: | Height: | Size: 249 KiB |
|
After Width: | Height: | Size: 16 KiB |
@@ -72,6 +72,15 @@ def wait_load(page, extra_ms=1000):
|
|||||||
|
|
||||||
def dbg(page, label):
|
def dbg(page, label):
|
||||||
print(f"[{label}] URL: {page.url}")
|
print(f"[{label}] URL: {page.url}")
|
||||||
|
try:
|
||||||
|
from pathlib import Path
|
||||||
|
shots = Path(__file__).parent / "debug_shots"
|
||||||
|
shots.mkdir(exist_ok=True)
|
||||||
|
path = shots / f"{label}.png"
|
||||||
|
page.screenshot(path=str(path), full_page=True)
|
||||||
|
print(f"[{label}] screenshot: {path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[{label}] screenshot failed: {e}")
|
||||||
|
|
||||||
|
|
||||||
def extract_study_label(study_search: str) -> str:
|
def extract_study_label(study_search: str) -> str:
|
||||||
@@ -178,16 +187,32 @@ def select_role(page):
|
|||||||
print(f" Vybráno: '{txt}'")
|
print(f" Vybráno: '{txt}'")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
clicked = False
|
||||||
for btn_sel in ['input[value="Continue"]', 'input[type="submit"]',
|
for btn_sel in ['input[value="Continue"]', 'input[type="submit"]',
|
||||||
'button:has-text("Continue")', 'button[type="submit"]']:
|
'button:has-text("Continue")', 'button[type="submit"]']:
|
||||||
try:
|
try:
|
||||||
btn = page.query_selector(btn_sel)
|
btn = page.query_selector(btn_sel)
|
||||||
except Exception:
|
except Exception:
|
||||||
break
|
continue
|
||||||
if btn:
|
if btn:
|
||||||
btn.click()
|
try:
|
||||||
wait_load(page, 2000)
|
with page.expect_navigation(timeout=15_000):
|
||||||
break
|
btn.click()
|
||||||
|
clicked = True
|
||||||
|
break
|
||||||
|
except PWTimeout:
|
||||||
|
print(f" Click on {btn_sel} nezpůsobil navigaci, zkouším další...")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not clicked:
|
||||||
|
print(" Fallback: submituji formulář přes JS...")
|
||||||
|
try:
|
||||||
|
with page.expect_navigation(timeout=15_000):
|
||||||
|
page.evaluate("document.forms[0] && document.forms[0].submit()")
|
||||||
|
except PWTimeout:
|
||||||
|
print(" JS submit fallback také neprošel.")
|
||||||
|
|
||||||
|
wait_load(page, 1500)
|
||||||
dbg(page, "after-role")
|
dbg(page, "after-role")
|
||||||
|
|
||||||
|
|
||||||
@@ -404,8 +429,12 @@ def download_datalisting(study: str, forms: list[str], country: str | None = Non
|
|||||||
results = []
|
results = []
|
||||||
|
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p:
|
||||||
browser = p.chromium.launch(headless=False, slow_mo=200)
|
browser = p.chromium.launch(
|
||||||
ctx_kwargs = {"accept_downloads": True}
|
headless=False,
|
||||||
|
slow_mo=200,
|
||||||
|
args=["--start-maximized"],
|
||||||
|
)
|
||||||
|
ctx_kwargs = {"accept_downloads": True, "no_viewport": True}
|
||||||
|
|
||||||
use_saved = auth_valid()
|
use_saved = auth_valid()
|
||||||
if use_saved:
|
if use_saved:
|
||||||
|
|||||||