This commit is contained in:
2026-05-30 07:33:06 +02:00
parent d7dbb92dd1
commit 9f955a40fe
29 changed files with 1419 additions and 15 deletions
+15 -1
View File
@@ -7,6 +7,8 @@
## Kopírování souborů z Windows ## Kopírování souborů z Windows
Všechny soubory z `U:\janssen\EmailsImport\DockerCustomApp\` nakopírovat do `\\tower\appdata\msgreceiver\`. Všechny soubory z `U:\janssen\EmailsImport\DockerCustomApp\` nakopírovat do `\\tower\appdata\msgreceiver\`.
**DŮLEŽITÉ:** Po každé změně `app.py` je nutný rebuild a restart kontejneru (viz níže). Bez toho běží stará verze.
## Build & restart (SSH) ## Build & restart (SSH)
```bash ```bash
# Připojení: ssh root@192.168.1.76, heslo: 7309208104 # Připojení: ssh root@192.168.1.76, heslo: 7309208104
@@ -26,6 +28,18 @@ docker run -d --name msgreceiver \
## Kontejner ## Kontejner
- Port: 8765 - Port: 8765
- Restart policy: unless-stopped - Restart policy: unless-stopped
- Endpointy: `/upload` (msg), `/upload-db` (db), `/upload-dropbox` (soubory do Dropboxu) - Endpointy:
- `/upload` (msg + volitelný `folder` → uloží na disk + import do Graph API)
- `/upload-db` (db → /msgs/db, maže staré)
- `/upload-dropbox` (soubory do Dropboxu)
- Auth: Bearer token v app.py - Auth: Bearer token v app.py
- Dropbox credentials: v `.env` uvnitř image - Dropbox credentials: v `.env` uvnitř image
- Graph API credentials: přímo v app.py (Mail.ReadWrite + Mail.Send, tenant TrialHelp s.r.o.)
## Graph import
Při uploadu .msg s parametrem `folder` (plná cesta z JNJ Outlooku) server:
1. Uloží .msg na disk
2. Parsuje .msg a importuje do schránky `vladimir.buzalka@buzalka.cz` do `Inbox/JNJ/...`
3. Složky se vytvářejí automaticky, mapování: `/vbuzalka@its.jnj.com/X``JNJ/X`, `/Online Archive.../X``JNJ/Online Archive/X`
Klient v1.4 (`janssenpc_email_send_new_v1.4.py`) posílá `folder` automaticky.
+229 -6
View File
@@ -1,17 +1,28 @@
# app.py | v1.0 | 2026-05-29 # app.py | v1.3 | 2026-05-29
# FastAPI server pro příjem .msg a .db souborů a upload do Dropboxu. # FastAPI server pro příjem .msg a .db souborů, upload do Dropboxu a import do Graph API.
# Endpointy: /upload (.msg → /msgs), /upload-db (.db → /msgs/db), /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230). # Endpointy: /upload (.msg → /msgs + Graph import), /upload-db (.db → /msgs/db),
# /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230).
from fastapi import FastAPI, UploadFile, File, Header, HTTPException from fastapi import FastAPI, UploadFile, File, Form, Header, HTTPException
import shutil import shutil
import base64
import logging
from pathlib import Path from pathlib import Path
from typing import Optional
import os import os
import dropbox import dropbox
import msal
import requests as http_requests
import extract_msg
from dateutil import parser as dtparser
from datetime import timezone
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv(Path(__file__).parent / ".env") load_dotenv(Path(__file__).parent / ".env")
app = FastAPI() app = FastAPI()
log = logging.getLogger("msgreceiver")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340" TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
SAVE_DIR = Path("/msgs") SAVE_DIR = Path("/msgs")
@@ -24,11 +35,213 @@ DROPBOX_APP_KEY = os.getenv("DROPBOX_APP_KEY", "")
DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET", "") DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET", "")
DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_APP_REFRESH_TOKEN", "") DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_APP_REFRESH_TOKEN", "")
# --- Graph API config ---
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
GRAPH_ROOT_FOLDER = "JNJ" # subfolder under Inbox — root for imported emails
GRAPH_URL = "https://graph.microsoft.com/v1.0"
# Cache: folder path → Graph folder ID
_folder_id_cache: dict[str, str] = {}
_graph_token: Optional[str] = None
def _get_graph_token() -> str:
global _graph_token
msalapp = msal.ConfidentialClientApplication(
GRAPH_CLIENT_ID,
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
client_credential=GRAPH_CLIENT_SECRET,
)
result = msalapp.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
if "access_token" not in result:
raise RuntimeError(f"Graph auth failed: {result}")
_graph_token = result["access_token"]
return _graph_token
def _graph_headers() -> dict:
token = _graph_token or _get_graph_token()
return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
def _ensure_folder(path_parts: list[str]) -> str:
"""Ensure folder hierarchy exists under Inbox, return leaf folder ID."""
cache_key = "/".join(path_parts)
if cache_key in _folder_id_cache:
return _folder_id_cache[cache_key]
headers = _graph_headers()
parent_id = "Inbox"
for i, part in enumerate(path_parts):
partial_key = "/".join(path_parts[: i + 1])
if partial_key in _folder_id_cache:
parent_id = _folder_id_cache[partial_key]
continue
# List children of parent
if parent_id == "Inbox":
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders"
else:
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
r = http_requests.get(url, headers=headers, timeout=15)
if r.status_code == 401:
_get_graph_token()
headers = _graph_headers()
r = http_requests.get(url, headers=headers, timeout=15)
found = None
for f in r.json().get("value", []):
if f["displayName"].lower() == part.lower():
found = f["id"]
break
if not found:
# Create folder
cr = http_requests.post(url, headers=headers, json={"displayName": part}, timeout=15)
if cr.status_code in (200, 201):
found = cr.json()["id"]
elif cr.status_code == 409:
# Already exists (race condition) — re-fetch
r2 = http_requests.get(url, headers=headers, timeout=15)
for f in r2.json().get("value", []):
if f["displayName"].lower() == part.lower():
found = f["id"]
break
if not found:
raise RuntimeError(f"Cannot create folder '{part}': {cr.text}")
_folder_id_cache[partial_key] = found
parent_id = found
return parent_id
def _map_jnj_folder(folder: str) -> list[str]:
"""Map JNJ folder path to Graph folder parts under JNJ root.
'/vbuzalka@its.jnj.com/Inbox/TMP' → ['JNJ', 'Inbox', 'TMP']
'/Online Archive - vbuzalka@its.jnj.com/Inbox' → ['JNJ', 'Online Archive', 'Inbox']
"""
parts = [p for p in folder.split("/") if p]
if not parts:
return [GRAPH_ROOT_FOLDER]
# First part is mailbox name — strip it but detect Online Archive
mailbox = parts[0]
rest = parts[1:]
prefix = [GRAPH_ROOT_FOLDER]
if "online archive" in mailbox.lower():
prefix.append("Online Archive")
return prefix + rest if rest else prefix
def _make_recipient(addr: str) -> dict:
if "<" in addr and ">" in addr:
name = addr[: addr.index("<")].strip().strip('"')
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
else:
name = addr
email = addr
return {"emailAddress": {"name": name, "address": email}}
def _import_msg_to_graph(msg_path: Path, folder: str) -> Optional[str]:
"""Parse .msg and import into Graph API mailbox. Returns message ID or None."""
try:
msg = extract_msg.Message(str(msg_path))
subject = msg.subject or "(no subject)"
body_html = msg.htmlBody
if isinstance(body_html, bytes):
body_html = body_html.decode("utf-8", errors="replace")
body_text = msg.body or ""
sender_email = msg.sender or ""
sender_name = getattr(msg, "senderName", None) or sender_email
to_raw = msg.to or ""
cc_raw = msg.cc or ""
date_raw = msg.date
att_list = []
for att in msg.attachments:
if att.data and att.longFilename:
att_list.append({
"@odata.type": "#microsoft.graph.fileAttachment",
"name": att.longFilename,
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
"contentBytes": base64.b64encode(att.data).decode(),
})
msg.close()
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
# Map folder and ensure it exists
folder_parts = _map_jnj_folder(folder)
folder_id = _ensure_folder(folder_parts)
payload = {
"subject": subject,
"body": {
"contentType": "HTML" if body_html else "Text",
"content": body_html or body_text,
},
"from": _make_recipient(f"{sender_name} <{sender_email}>"),
"toRecipients": [_make_recipient(a) for a in to_list],
"ccRecipients": [_make_recipient(a) for a in cc_list],
"isRead": True,
"singleValueExtendedProperties": [
{"id": "Integer 0x0E07", "value": "1"}
],
}
if date_raw:
try:
dt = dtparser.parse(str(date_raw))
payload["receivedDateTime"] = dt.astimezone(timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
except Exception:
pass
if att_list:
payload["attachments"] = att_list
headers = _graph_headers()
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code == 401:
_get_graph_token()
headers = _graph_headers()
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code in (200, 201):
msg_id = r.json().get("id", "")
log.info("Graph OK: %s%s", subject[:60], "/".join(folder_parts))
return msg_id
else:
log.error("Graph FAIL [%d]: %s | %s", r.status_code, subject[:60], r.text[:200])
return None
except Exception as e:
log.error("Graph import error for %s: %s", msg_path.name, e)
return None
@app.post("/upload") @app.post("/upload")
async def upload_msg( async def upload_msg(
file: UploadFile = File(...), file: UploadFile = File(...),
authorization: str = Header(None) authorization: str = Header(None),
folder: str = Form(""),
): ):
if authorization != f"Bearer {TOKEN}": if authorization != f"Bearer {TOKEN}":
raise HTTPException(status_code=401, detail="Unauthorized") raise HTTPException(status_code=401, detail="Unauthorized")
@@ -39,7 +252,17 @@ async def upload_msg(
return {"status": "exists", "file": file.filename} return {"status": "exists", "file": file.filename}
with dest.open("wb") as f: with dest.open("wb") as f:
shutil.copyfileobj(file.file, f) shutil.copyfileobj(file.file, f)
return {"status": "saved", "file": file.filename}
# Import to Graph API if folder was provided by client
graph_id = None
if folder:
graph_id = _import_msg_to_graph(dest, folder)
return {
"status": "saved",
"file": file.filename,
"graph_imported": graph_id is not None,
}
@app.post("/upload-db") @app.post("/upload-db")
@@ -3,3 +3,7 @@ uvicorn
python-multipart python-multipart
dropbox dropbox
python-dotenv python-dotenv
msal
requests
extract-msg
python-dateutil
@@ -0,0 +1,180 @@
# test_import_msg.py — pokusný import .msg do schránky přes Graph API
# Parsuje .msg soubor a vytvoří zprávu v Inbox cílové schránky.
import base64
import msal
import requests
import extract_msg
import sys
from pathlib import Path
# === CONFIG ===
TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
MAILBOX = "vladimir.buzalka@buzalka.cz"
AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
SCOPE = ["https://graph.microsoft.com/.default"]
GRAPH_URL = "https://graph.microsoft.com/v1.0"
TARGET_FOLDER = "JNJ" # subfolder under Inbox
# === MSG FILE ===
MSG_PATH = Path(__file__).parent / "FC130007ACFE5DCB0000.msg"
def get_token():
app = msal.ConfidentialClientApplication(
CLIENT_ID, authority=AUTHORITY, client_credential=CLIENT_SECRET
)
token = app.acquire_token_for_client(scopes=SCOPE)
if "access_token" not in token:
raise RuntimeError(f"Auth failed: {token}")
return token["access_token"]
def parse_msg(path):
"""Parse .msg file and return dict with message properties."""
msg = extract_msg.Message(str(path))
# Read all properties before closing
subject = msg.subject or "(no subject)"
body_html = msg.htmlBody
if isinstance(body_html, bytes):
body_html = body_html.decode("utf-8", errors="replace")
body_text = msg.body or ""
sender_email = msg.sender or ""
sender_name = getattr(msg, "senderName", None) or sender_email
to_raw = msg.to or ""
cc_raw = msg.cc or ""
date_raw = msg.date
att_list = []
for att in msg.attachments:
if att.data and att.longFilename:
att_list.append({
"@odata.type": "#microsoft.graph.fileAttachment",
"name": att.longFilename,
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
"contentBytes": base64.b64encode(att.data).decode(),
})
msg.close()
# Process after close
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
received = str(date_raw) if date_raw else None
return {
"subject": subject,
"body_html": body_html,
"body_text": body_text,
"sender_email": sender_email,
"sender_name": sender_name,
"to": to_list,
"cc": cc_list,
"received": received,
"attachments": att_list,
}
def make_recipient(addr):
"""Create Graph API recipient object from email address."""
# Handle 'Name <email>' format
if "<" in addr and ">" in addr:
name = addr[:addr.index("<")].strip().strip('"')
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
else:
name = addr
email = addr
return {"emailAddress": {"name": name, "address": email}}
def import_msg(msg_path):
token = get_token()
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
}
print(f"Parsing: {msg_path}")
data = parse_msg(msg_path)
print(f" Subject: {data['subject']}")
print(f" From: {data['sender_name']} <{data['sender_email']}>")
print(f" To: {data['to']}")
print(f" Date: {data['received']}")
print(f" Attachments: {len(data['attachments'])}")
# 1. Create message in mailFolder (Inbox)
payload = {
"subject": data["subject"],
"body": {
"contentType": "HTML" if data["body_html"] else "Text",
"content": data["body_html"] or data["body_text"],
},
"from": make_recipient(
f"{data['sender_name']} <{data['sender_email']}>"
),
"toRecipients": [make_recipient(a) for a in data["to"]],
"ccRecipients": [make_recipient(a) for a in data["cc"]],
"isRead": True,
# PR_MESSAGE_FLAGS (0x0E07) = 1 → read, NOT draft (without MSGFLAG_UNSENT=0x08)
"singleValueExtendedProperties": [
{
"id": "Integer 0x0E07",
"value": "1",
}
],
}
if data["received"]:
# Graph API expects ISO 8601 UTC format
from datetime import datetime, timezone
try:
from dateutil import parser as dtparser
dt = dtparser.parse(data["received"])
payload["receivedDateTime"] = dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
except Exception as e:
print(f" Warning: cannot parse date '{data['received']}': {e}")
if data["attachments"]:
payload["attachments"] = data["attachments"]
# Find target folder (Inbox/JNJ)
folder_url = f"{GRAPH_URL}/users/{MAILBOX}/mailFolders/Inbox/childFolders"
r_folders = requests.get(folder_url, headers=headers, timeout=15)
folder_id = None
for f in r_folders.json().get("value", []):
if f["displayName"].lower() == TARGET_FOLDER.lower():
folder_id = f["id"]
break
if not folder_id:
# Create the folder if it doesn't exist
r_create = requests.post(
folder_url, headers=headers,
json={"displayName": TARGET_FOLDER}, timeout=15
)
folder_id = r_create.json()["id"]
print(f" Created folder '{TARGET_FOLDER}'")
url = f"{GRAPH_URL}/users/{MAILBOX}/mailFolders/{folder_id}/messages"
print(f"\nPOST -> Inbox/{TARGET_FOLDER}")
r = requests.post(url, headers=headers, json=payload, timeout=30)
if r.status_code in (200, 201):
msg_id = r.json().get("id", "?")
print(f" OK! Message created, id={msg_id[:40]}...")
return r.json()
else:
print(f" FAILED [{r.status_code}]: {r.text[:500]}")
return None
if __name__ == "__main__":
path = sys.argv[1] if len(sys.argv) > 1 else MSG_PATH
import_msg(Path(path))
@@ -0,0 +1,233 @@
"""
janssenpc_email_send_new v1.4
Verze: 1.4.1
Datum: 2026-05-29
Popis: Prochází složky Inbox, Deleted Items a Sent Items v Outlooku (MAPI),
ukládá emailové zprávy jako .msg soubory a uploaduje je na https://msgs.buzalka.cz.
Zaznamenává zpracované zprávy do SQLite DB (jnjemails.db) a DB uploaduje na server
jednou za 24 hodin (ne při každém běhu). Podporuje pokračování od posledního
zpracovaného emailu (resume). Folder cesta obsahuje celé jméno schránky
(např. /vbuzalka@its.jnj.com/Inbox). Chyby se logují do jnjemails_errors.log.
"""
import win32com.client
import requests
import sqlite3
import urllib3
import logging
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
import io
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
DB_UPLOAD_MARKER = r"C:\Users\vbuzalka\SQLITE\jnjemails_last_db_upload.txt"
DB_UPLOAD_INTERVAL_H = 24
LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails_errors.log"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
# olFolderInbox=6, olFolderDeletedItems=3, olFolderSentMail=5
FOLDERS_TO_PROCESS = [6, 3, 5]
UPLOAD_LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails_uploads.log"
logging.basicConfig(
filename=LOG_PATH,
level=logging.ERROR,
format="%(asctime)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
encoding="utf-8",
)
# Separate upload logger — logs every upload attempt
_upload_log = logging.getLogger("uploads")
_upload_log.setLevel(logging.DEBUG)
_uh = logging.FileHandler(UPLOAD_LOG_PATH, encoding="utf-8")
_uh.setFormatter(logging.Formatter("%(asctime)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
_upload_log.addHandler(_uh)
def init_db(conn):
conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT NOT NULL,
subject TEXT,
sender TEXT,
received_at TEXT,
folder TEXT,
source TEXT,
uploaded_at TEXT DEFAULT (datetime('now'))
)
""")
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
conn.commit()
def is_uploaded(conn, message_id):
row = conn.execute(
"SELECT 1 FROM messages WHERE message_id = ? LIMIT 1", (message_id,)
).fetchone()
return row is not None
def save_to_db(conn, message_id, subject, sender, received_at, folder, source):
conn.execute("""
INSERT OR IGNORE INTO messages (message_id, subject, sender, received_at, folder, source)
VALUES (?, ?, ?, ?, ?, ?)
""", (message_id, subject, sender, received_at, folder, source))
conn.commit()
def _db_upload_due() -> bool:
"""Return True if 24h elapsed since last DB upload (or never uploaded)."""
marker = Path(DB_UPLOAD_MARKER)
if not marker.exists():
return True
try:
last = datetime.fromisoformat(marker.read_text().strip())
return (datetime.now() - last).total_seconds() >= DB_UPLOAD_INTERVAL_H * 3600
except Exception:
return True
def _db_upload_mark():
"""Write current timestamp to marker file."""
Path(DB_UPLOAD_MARKER).write_text(datetime.now().isoformat())
def upload_db(db_path, force=False):
if not force and not _db_upload_due():
return
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jnjemails_{timestamp}.db"
with open(db_path, "rb") as f:
resp = requests.post(
"https://msgs.buzalka.cz/upload-db",
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=60
)
print(f" DB upload: {resp.json()}")
_db_upload_mark()
def upload_msg(msg_path, filename, folder=""):
_upload_log.info("UPLOAD %s | folder=%s", filename, folder)
with open(msg_path, "rb") as f:
resp = requests.post(
UPLOAD_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
data={"folder": folder},
timeout=60
)
resp.raise_for_status()
result = resp.json()
_upload_log.info("RESPONSE %s | %s", filename, result)
return result["status"]
def get_folder_resume_date(conn, folder_path):
row = conn.execute(
"SELECT MAX(received_at) FROM messages WHERE folder = ?",
(folder_path,)
).fetchone()
if not row or not row[0]:
return None
last_dt = datetime.fromisoformat(row[0])
return last_dt - timedelta(hours=1)
def process_folder(conn, folder, source, folder_path="", counter=None):
if counter is None:
counter = [0]
current_path = f"{folder_path}/{folder.Name}"
try:
resume_dt = get_folder_resume_date(conn, current_path)
items = folder.Items
if resume_dt:
resume_str = resume_dt.strftime("%Y/%m/%d %H:%M:%S")
filter_str = f"@SQL=\"urn:schemas:httpmail:datereceived\" > '{resume_str}'"
items = folder.Items.Restrict(filter_str)
print(f"\n Složka: {current_path} | pokračuji od: {resume_str}")
else:
print(f"\n Složka: {current_path} | od začátku")
items.Sort("[ReceivedTime]", False)
count = 0
skipped = 0
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if is_uploaded(conn, mid):
skipped += 1
continue
with tempfile.TemporaryDirectory() as tmp:
safe_name = f"{item.EntryID[-20:]}.msg"
tmp_path = Path(tmp) / safe_name
item.SaveAs(str(tmp_path), 3)
status = upload_msg(tmp_path, safe_name, current_path)
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
save_to_db(conn, mid, item.Subject, item.SenderEmailAddress,
received, current_path, source)
counter[0] += 1
count += 1
if counter[0] % 1000 == 0:
print(f" → celkem {counter[0]} emailů přeneseno, uploaduji DB...")
upload_db(DB_PATH)
print(f" {status.upper():6} | {item.Subject[:60]}")
except Exception as e:
subject = getattr(item, 'Subject', '?')
sender = getattr(item, 'SenderEmailAddress', '?')
received = getattr(item, 'ReceivedTime', '?')
print(f" CHYBA | {subject[:40]} | {e}")
logging.error("folder=%s | sender=%s | received=%s | subject=%s | error=%s",
current_path, sender, received, subject, e)
print(f" → složka hotova: přeneseno {count} | skip {skipped}")
except Exception as e:
print(f" CHYBA složka {current_path}: {e}")
logging.error("folder=%s | CHYBA SLOŽKY | error=%s", current_path, e)
for subfolder in folder.Folders:
process_folder(conn, subfolder, source, current_path, counter)
# --- MAIN ---
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
init_db(conn)
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
counter = [0]
for folder_id in FOLDERS_TO_PROCESS:
folder = ns.GetDefaultFolder(folder_id)
mailbox_name = folder.Parent.Name
print(f"\n=== {folder.Name} ({mailbox_name}) ===")
process_folder(conn, folder, "mailbox", f"/{mailbox_name}", counter)
# Finální DB upload po dokončení
print("\nFinální upload DB...")
upload_db(DB_PATH)
conn.close()
print(f"\nHotovo. Chyby logovány do: {LOG_PATH}")
+1 -1
View File
File diff suppressed because one or more lines are too long
Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 249 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

+33 -4
View File
@@ -72,6 +72,15 @@ def wait_load(page, extra_ms=1000):
def dbg(page, label): def dbg(page, label):
print(f"[{label}] URL: {page.url}") print(f"[{label}] URL: {page.url}")
try:
from pathlib import Path
shots = Path(__file__).parent / "debug_shots"
shots.mkdir(exist_ok=True)
path = shots / f"{label}.png"
page.screenshot(path=str(path), full_page=True)
print(f"[{label}] screenshot: {path}")
except Exception as e:
print(f"[{label}] screenshot failed: {e}")
def extract_study_label(study_search: str) -> str: def extract_study_label(study_search: str) -> str:
@@ -178,16 +187,32 @@ def select_role(page):
print(f" Vybráno: '{txt}'") print(f" Vybráno: '{txt}'")
break break
clicked = False
for btn_sel in ['input[value="Continue"]', 'input[type="submit"]', for btn_sel in ['input[value="Continue"]', 'input[type="submit"]',
'button:has-text("Continue")', 'button[type="submit"]']: 'button:has-text("Continue")', 'button[type="submit"]']:
try: try:
btn = page.query_selector(btn_sel) btn = page.query_selector(btn_sel)
except Exception: except Exception:
break continue
if btn: if btn:
try:
with page.expect_navigation(timeout=15_000):
btn.click() btn.click()
wait_load(page, 2000) clicked = True
break break
except PWTimeout:
print(f" Click on {btn_sel} nezpůsobil navigaci, zkouším další...")
continue
if not clicked:
print(" Fallback: submituji formulář přes JS...")
try:
with page.expect_navigation(timeout=15_000):
page.evaluate("document.forms[0] && document.forms[0].submit()")
except PWTimeout:
print(" JS submit fallback také neprošel.")
wait_load(page, 1500)
dbg(page, "after-role") dbg(page, "after-role")
@@ -404,8 +429,12 @@ def download_datalisting(study: str, forms: list[str], country: str | None = Non
results = [] results = []
with sync_playwright() as p: with sync_playwright() as p:
browser = p.chromium.launch(headless=False, slow_mo=200) browser = p.chromium.launch(
ctx_kwargs = {"accept_downloads": True} headless=False,
slow_mo=200,
args=["--start-maximized"],
)
ctx_kwargs = {"accept_downloads": True, "no_viewport": True}
use_saved = auth_valid() use_saved = auth_valid()
if use_saved: if use_saved:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long