z230
This commit is contained in:
@@ -0,0 +1,30 @@
|
|||||||
|
# =====================================================================
|
||||||
|
# clean_windows_temp_v1.0_2026-06-10.ps1
|
||||||
|
# Verze: 1.0 | Datum: 2026-06-10
|
||||||
|
# Popis: Vyčistí C:\Windows\Temp a C:\Windows\SoftwareDistribution\Download
|
||||||
|
# (cache Windows Update). Vyžaduje spuštění JAKO SPRÁVCE.
|
||||||
|
# Zastaví službu Windows Update, smaže cache, službu znovu spustí.
|
||||||
|
# =====================================================================
|
||||||
|
#Requires -RunAsAdministrator
|
||||||
|
|
||||||
|
$before = (Get-PSDrive C).Free
|
||||||
|
|
||||||
|
Write-Host "Mažu C:\Windows\Temp ..."
|
||||||
|
Get-ChildItem 'C:\Windows\Temp' -Force -ErrorAction SilentlyContinue |
|
||||||
|
Remove-Item -Recurse -Force -Confirm:$false -ErrorAction SilentlyContinue
|
||||||
|
|
||||||
|
Write-Host "Zastavuji službu Windows Update ..."
|
||||||
|
Stop-Service wuauserv -Force -ErrorAction SilentlyContinue
|
||||||
|
Stop-Service bits -Force -ErrorAction SilentlyContinue
|
||||||
|
|
||||||
|
Write-Host "Mažu C:\Windows\SoftwareDistribution\Download ..."
|
||||||
|
Get-ChildItem 'C:\Windows\SoftwareDistribution\Download' -Force -ErrorAction SilentlyContinue |
|
||||||
|
Remove-Item -Recurse -Force -Confirm:$false -ErrorAction SilentlyContinue
|
||||||
|
|
||||||
|
Write-Host "Spouštím služby zpět ..."
|
||||||
|
Start-Service bits -ErrorAction SilentlyContinue
|
||||||
|
Start-Service wuauserv -ErrorAction SilentlyContinue
|
||||||
|
|
||||||
|
$after = (Get-PSDrive C).Free
|
||||||
|
Write-Host ("Uvolněno: {0:N2} GB | Volno celkem: {1:N2} GB" -f (($after-$before)/1GB), ($after/1GB))
|
||||||
|
Read-Host "Hotovo - Enter pro zavření"
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
# =====================================================================
|
||||||
|
# clean_windows_temp_v1.1_2026-06-10.ps1
|
||||||
|
# Verze: 1.1 | Datum: 2026-06-10
|
||||||
|
# Popis: Vyčistí C:\Windows\Temp. Vyžaduje spuštění JAKO SPRÁVCE.
|
||||||
|
# v1.1: před mazáním převezme vlastnictví (takeown) a přidá
|
||||||
|
# Administrators plná práva (icacls) — v1.0 selhala na ACL
|
||||||
|
# souborů wct*.tmp vlastněných systémovým účtem.
|
||||||
|
# (SoftwareDistribution\Download už vyčistila v1.0.)
|
||||||
|
# =====================================================================
|
||||||
|
#Requires -RunAsAdministrator
|
||||||
|
|
||||||
|
$before = (Get-PSDrive C).Free
|
||||||
|
|
||||||
|
Write-Host "Prebiram vlastnictvi C:\Windows\Temp (muze trvat par minut) ..."
|
||||||
|
takeown /F 'C:\Windows\Temp' /R /D Y | Out-Null
|
||||||
|
icacls 'C:\Windows\Temp' /grant 'Administrators:(OI)(CI)F' /T /C /Q | Out-Null
|
||||||
|
|
||||||
|
Write-Host "Mazu obsah C:\Windows\Temp ..."
|
||||||
|
Get-ChildItem 'C:\Windows\Temp' -Force -ErrorAction SilentlyContinue |
|
||||||
|
Remove-Item -Recurse -Force -Confirm:$false -ErrorAction SilentlyContinue
|
||||||
|
|
||||||
|
$left = (Get-ChildItem 'C:\Windows\Temp' -Recurse -Force -File -ErrorAction SilentlyContinue |
|
||||||
|
Measure-Object Length -Sum).Sum
|
||||||
|
$after = (Get-PSDrive C).Free
|
||||||
|
Write-Host ("Uvolneno: {0:N2} GB | Zbyva v Temp: {1:N2} GB | Volno celkem: {2:N2} GB" -f `
|
||||||
|
(($after-$before)/1GB), ($left/1GB), ($after/1GB))
|
||||||
|
Read-Host "Hotovo - Enter pro zavreni"
|
||||||
@@ -0,0 +1,66 @@
|
|||||||
|
# Analýza disku C — co je možné smazat
|
||||||
|
**Verze:** 1.0
|
||||||
|
**Datum:** 2026-06-10
|
||||||
|
**Stroj:** Z230, Windows 10 LTSC
|
||||||
|
**Rozsah:** C:\Users\vladimir.buzalka.BUZALKA (+ rychlá kontrola systémových temp složek)
|
||||||
|
|
||||||
|
## Souhrn
|
||||||
|
- Disk C: **222 GB celkem, jen 8,4 GB volných**
|
||||||
|
- Profil uživatele: **~91 GB**, z toho **AppData 82 GB**
|
||||||
|
- Bezpečně uvolnitelné ihned: **~32 GB**
|
||||||
|
- Po zvážení (uživatelská data / vyžaduje rozhodnutí): dalších **~15 GB**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. BEZPEČNÉ SMAZAT IHNED (~32 GB)
|
||||||
|
|
||||||
|
| GB | Co | Cesta | Poznámka |
|
||||||
|
|---|---|---|---|
|
||||||
|
| **15,5** | Claude Desktop VM bundle | `AppData\Roaming\Claude\vm_bundles` | claudevm.bundle 13,1 GB + warm 2,4 GB. Image VM pro sandbox/code-execution funkci Claude Desktop. Po smazání se při dalším použití VM funkce znovu stáhne. Největší jednotlivá položka. |
|
||||||
|
| **4,1** | Evernote resource-cache | `AppData\Roaming\Evernote\resource-cache` | Cache příloh, znovu se stáhne ze serveru. |
|
||||||
|
| **3,8** | Windows Temp | `C:\Windows\Temp` | Vyžaduje admin. |
|
||||||
|
| **2,7** | Chrome cache | `AppData\Local\Google\Chrome\User Data\Default` — Service Worker\CacheStorage (1,7), Cache (0,26), Code Cache (0,23), WebStorage CacheStorage (~0,6) | Nejčistší je Chrome → Nastavení → Smazat data prohlížení → „Soubory v mezipaměti". Nemaže hesla/historii. |
|
||||||
|
| **1,9** | Edge cache | `AppData\Local\Microsoft\Edge\User Data\Profile 1` — Service Worker (0,8), Cache (0,27), WebStorage (~0,9) | Stejně přes nastavení Edge. |
|
||||||
|
| **1,9** | Windows Update cache | `C:\Windows\SoftwareDistribution\Download` | Vyžaduje admin, ideálně zastavit službu wuauserv. |
|
||||||
|
| **1,4** | pip cache | `AppData\Local\pip` | `pip cache purge`. Balíčky se při příští instalaci stáhnou znovu. |
|
||||||
|
| **1,0** | Uživatelský Temp | `AppData\Local\Temp` | Smazat obsah (zamčené soubory přeskočit). |
|
||||||
|
| **0,9** | Evernote updatery | `AppData\Local\evernote-client-updater` (0,62) + `Evernote\AutoUpdate` (0,28) | Stažené instalátory starých verzí. |
|
||||||
|
| **0,5** | SquirrelTemp | `AppData\Local\SquirrelTemp` | Zbytky instalátorů Electron aplikací. |
|
||||||
|
|
||||||
|
## 2. PRAVDĚPODOBNĚ SMAZAT — krátká kontrola předem (~10 GB)
|
||||||
|
|
||||||
|
| GB | Co | Cesta | Poznámka |
|
||||||
|
|---|---|---|---|
|
||||||
|
| **5,4** | VirtualStore — „Zákon 4" | `AppData\Local\VirtualStore\Program Files\Zákon 4` | Data legacy aplikace Zákon, která neměla práva zapisovat do Program Files. **Pokud už aplikaci Zákon nepoužíváš, smazat celé.** Pokud používáš, jsou to její živá data — nesahat. |
|
||||||
|
| **1,7** | WSL disk | `AppData\Local\wsl\{4fd62727-…}` | Virtuální disk WSL distribuce. Smazat jen pokud WSL nepoužíváš (`wsl --list` → `wsl --unregister <distro>`). |
|
||||||
|
| **1,6** | JetBrains cache | `AppData\Local\JetBrains` | Cache/indexy PyCharm vč. starých verzí. Bezpečně: smazat podsložky starých verzí, aktuální nechat (jinak se přeindexuje projekt). |
|
||||||
|
| **1,2** | Office SolutionPackages | `AppData\Local\Microsoft\Office\SolutionPackages` | Cache webových doplňků Office, obnoví se. |
|
||||||
|
| **~1,5** | Spotify cache | `AppData\Local\Spotify` (1,95 celkem) | Většina je cache skladeb — vyčistit v aplikaci: Nastavení → Úložiště → Vymazat mezipaměť. |
|
||||||
|
|
||||||
|
## 3. NEMAZAT PŘÍMO — uživatelská/živá data (ale lze zmenšit)
|
||||||
|
|
||||||
|
| GB | Co | Poznámka |
|
||||||
|
|---|---|---|
|
||||||
|
| **5,2** | Outlook OST/NST (`Local\Microsoft\Outlook`) | Aktivní cache 3 schránek (vladimir 2,8 + ordinace 1,6 + michaela 0,7). Lze zmenšit: Outlook → Nastavení účtu → „Pošta k offline použití" zkrátit např. na 6–12 měsíců; soubor se po kompaktaci zmenší. |
|
||||||
|
| **4,3** | Box (`~\Box`) | Synchronizovaná data studií (MDD3003, GLOW, ICONIC CD/UC). V Box Drive lze označit složky jako *online-only* — uvolní místo bez ztráty dat. |
|
||||||
|
| **4,1** | Evernote Databases (`~\Evernote\Databases`) | Lokální databáze poznámek. Smazáním se nic neztratí (re-sync ze serveru), ale první synchronizace bude dlouhá. Nechat, pokud není nouze. |
|
||||||
|
| **1,9** | Snagit DataStore (`Local\TechSmith\Snagit\DataStore`) | **Knihovna pořízených screenshotů** — uživatelská data. Případně promazat staré captures přímo v Snagit editoru (Library). |
|
||||||
|
| **1,6** | Playwright browsery (`Local\ms-playwright`) | Používají je projektové skripty (Covance/Medidata/IWRS downloady). Nemazat celé; max `playwright uninstall --all` a nechat doinstalovat jen aktuální verzi (bývají tam staré buildy). |
|
||||||
|
| **1,5** | OneDrive cache | Spravuje si OneDrive sám. |
|
||||||
|
| **1,3** | ABBYY, **1,1** Amazon, **0,8** Mozilla, … | Drobnosti, nestojí za riziko. |
|
||||||
|
| **5,5** | pagefile.sys | Systémový stránkovací soubor — nesahat. |
|
||||||
|
|
||||||
|
## 4. Doporučený postup (pořadí podle výtěžnost/riziko)
|
||||||
|
|
||||||
|
1. `Roaming\Claude\vm_bundles` → **+15,5 GB** (okamžitě, bez rizika)
|
||||||
|
2. Evernote resource-cache + updatery → **+5 GB**
|
||||||
|
3. Chrome/Edge cache přes nastavení prohlížečů → **+4,5 GB**
|
||||||
|
4. Windows Temp + SoftwareDistribution\Download (admin) → **+5,7 GB**
|
||||||
|
5. `pip cache purge` + Local\Temp + SquirrelTemp → **+2,9 GB**
|
||||||
|
6. Rozhodnout: Zákon 4 (5,4 GB), WSL (1,7 GB), Spotify cache (1,5 GB)
|
||||||
|
7. Dlouhodobě: zkrátit offline období Outlooku, Box online-only
|
||||||
|
|
||||||
|
**Kroky 1–5 dohromady ≈ 33 GB → volné místo by stouplo z 8 GB na ~41 GB.**
|
||||||
|
|
||||||
|
---
|
||||||
|
*Mazání zatím neproběhlo — čeká na potvrzení. Smazané položky lze u kroků 1–5 obnovit automaticky (jde o cache).*
|
||||||
@@ -0,0 +1,585 @@
|
|||||||
|
# app.py | v2.0 | 2026-06-08
|
||||||
|
# FastAPI server pro příjem .msg a .db souborů, upload do Dropboxu a import do Graph API.
|
||||||
|
# Endpointy: /upload (.msg → /msgs + Graph import), /upload-db (.db → /msgs/db),
|
||||||
|
# /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230),
|
||||||
|
# /message-delete, /message-update (sync: smazání, přečtení, přesun složky),
|
||||||
|
# /mirror-plan (diff manifestu z JNJ vůči schránce → smaže přebytky, vrátí to_add),
|
||||||
|
# /status (seznam souborů k odeslání na JNJ — jména zašifrována Fernetem),
|
||||||
|
# /item/{enc_filename} (stažení souboru — enc_filename je Fernet token).
|
||||||
|
|
||||||
|
from fastapi import FastAPI, UploadFile, File, Form, Header, HTTPException, Response
|
||||||
|
from pydantic import BaseModel
|
||||||
|
import shutil
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
import os
|
||||||
|
import dropbox
|
||||||
|
import msal
|
||||||
|
import requests as http_requests
|
||||||
|
import extract_msg
|
||||||
|
from dateutil import parser as dtparser
|
||||||
|
from datetime import timezone
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
|
||||||
|
load_dotenv(Path(__file__).parent / ".env")
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
log = logging.getLogger("msgreceiver")
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||||
|
|
||||||
|
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
|
||||||
|
# Šifrovací klíč odvozený z TOKENu (Fernet = AES-128 CBC + HMAC)
|
||||||
|
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
|
||||||
|
|
||||||
|
SAVE_DIR = Path("/msgs")
|
||||||
|
DB_DIR = Path("/msgs/db")
|
||||||
|
|
||||||
|
SAVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
DB_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
DROPBOX_APP_KEY = os.getenv("DROPBOX_APP_KEY", "")
|
||||||
|
DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET", "")
|
||||||
|
DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_APP_REFRESH_TOKEN", "")
|
||||||
|
|
||||||
|
# --- Graph API config ---
|
||||||
|
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
|
||||||
|
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
|
||||||
|
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
|
||||||
|
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
|
||||||
|
GRAPH_ROOT_FOLDER = "JNJ" # subfolder under Inbox — root for imported emails
|
||||||
|
DROPBOX_UPLOAD_TO_JNJ = "/!!!Days/Downloads Z230/UploadToJNJ"
|
||||||
|
GRAPH_URL = "https://graph.microsoft.com/v1.0"
|
||||||
|
|
||||||
|
# Cache: folder path → Graph folder ID
|
||||||
|
_folder_id_cache: dict[str, str] = {}
|
||||||
|
_graph_token: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_graph_token() -> str:
|
||||||
|
global _graph_token
|
||||||
|
msalapp = msal.ConfidentialClientApplication(
|
||||||
|
GRAPH_CLIENT_ID,
|
||||||
|
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
|
||||||
|
client_credential=GRAPH_CLIENT_SECRET,
|
||||||
|
)
|
||||||
|
result = msalapp.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
|
||||||
|
if "access_token" not in result:
|
||||||
|
raise RuntimeError(f"Graph auth failed: {result}")
|
||||||
|
_graph_token = result["access_token"]
|
||||||
|
return _graph_token
|
||||||
|
|
||||||
|
|
||||||
|
def _graph_headers() -> dict:
|
||||||
|
token = _graph_token or _get_graph_token()
|
||||||
|
return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_folder(path_parts: list[str]) -> str:
|
||||||
|
"""Ensure folder hierarchy exists under Inbox, return leaf folder ID."""
|
||||||
|
cache_key = "/".join(path_parts)
|
||||||
|
if cache_key in _folder_id_cache:
|
||||||
|
return _folder_id_cache[cache_key]
|
||||||
|
|
||||||
|
headers = _graph_headers()
|
||||||
|
parent_id = "Inbox"
|
||||||
|
|
||||||
|
for i, part in enumerate(path_parts):
|
||||||
|
partial_key = "/".join(path_parts[: i + 1])
|
||||||
|
if partial_key in _folder_id_cache:
|
||||||
|
parent_id = _folder_id_cache[partial_key]
|
||||||
|
continue
|
||||||
|
|
||||||
|
# List children of parent
|
||||||
|
if parent_id == "Inbox":
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders"
|
||||||
|
else:
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
|
||||||
|
|
||||||
|
r = http_requests.get(url, headers=headers, timeout=15)
|
||||||
|
if r.status_code == 401:
|
||||||
|
_get_graph_token()
|
||||||
|
headers = _graph_headers()
|
||||||
|
r = http_requests.get(url, headers=headers, timeout=15)
|
||||||
|
|
||||||
|
found = None
|
||||||
|
for f in r.json().get("value", []):
|
||||||
|
if f["displayName"].lower() == part.lower():
|
||||||
|
found = f["id"]
|
||||||
|
break
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
# Create folder
|
||||||
|
cr = http_requests.post(url, headers=headers, json={"displayName": part}, timeout=15)
|
||||||
|
if cr.status_code in (200, 201):
|
||||||
|
found = cr.json()["id"]
|
||||||
|
elif cr.status_code == 409:
|
||||||
|
# Already exists (race condition) — re-fetch
|
||||||
|
r2 = http_requests.get(url, headers=headers, timeout=15)
|
||||||
|
for f in r2.json().get("value", []):
|
||||||
|
if f["displayName"].lower() == part.lower():
|
||||||
|
found = f["id"]
|
||||||
|
break
|
||||||
|
if not found:
|
||||||
|
raise RuntimeError(f"Cannot create folder '{part}': {cr.text}")
|
||||||
|
|
||||||
|
_folder_id_cache[partial_key] = found
|
||||||
|
parent_id = found
|
||||||
|
|
||||||
|
return parent_id
|
||||||
|
|
||||||
|
|
||||||
|
def _map_jnj_folder(folder: str) -> list[str]:
|
||||||
|
"""Map JNJ folder path to Graph folder parts under JNJ root.
|
||||||
|
|
||||||
|
'/vbuzalka@its.jnj.com/Inbox/TMP' → ['JNJ', 'Inbox', 'TMP']
|
||||||
|
'/Online Archive - vbuzalka@its.jnj.com/Inbox' → ['JNJ', 'Online Archive', 'Inbox']
|
||||||
|
"""
|
||||||
|
parts = [p for p in folder.split("/") if p]
|
||||||
|
if not parts:
|
||||||
|
return [GRAPH_ROOT_FOLDER]
|
||||||
|
|
||||||
|
# First part is mailbox name — strip it but detect Online Archive
|
||||||
|
mailbox = parts[0]
|
||||||
|
rest = parts[1:]
|
||||||
|
|
||||||
|
prefix = [GRAPH_ROOT_FOLDER]
|
||||||
|
if "online archive" in mailbox.lower():
|
||||||
|
prefix.append("Online Archive")
|
||||||
|
|
||||||
|
return prefix + rest if rest else prefix
|
||||||
|
|
||||||
|
|
||||||
|
def _norm_mid(mid: str) -> str:
|
||||||
|
"""Normalizuj Internet Message-ID pro porovnání (osekej <> a whitespace)."""
|
||||||
|
return (mid or "").strip().strip("<>").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _enumerate_jnj_mailbox(cutoff_iso: str) -> dict[str, str]:
|
||||||
|
"""Vrať {normalizované internetMessageId: graph_id} pro všechny zprávy ve
|
||||||
|
složkách JNJ/* schránky, které mají receivedDateTime >= cutoff_iso.
|
||||||
|
|
||||||
|
Slouží jako 'co už ve schránce je' pro mirror diff. Starší zprávy než cutoff
|
||||||
|
(např. únorový archiv) se nenačtou — mirror se jich tedy nikdy nedotkne.
|
||||||
|
"""
|
||||||
|
jnj_id = _ensure_folder([GRAPH_ROOT_FOLDER])
|
||||||
|
|
||||||
|
# BFS přes JNJ root + všechny podsložky
|
||||||
|
all_folders = [jnj_id]
|
||||||
|
i = 0
|
||||||
|
while i < len(all_folders):
|
||||||
|
fid = all_folders[i]
|
||||||
|
i += 1
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/childFolders?$top=100"
|
||||||
|
while url:
|
||||||
|
r = _retry_graph(http_requests.get, url, _graph_headers, timeout=20)
|
||||||
|
data = r.json()
|
||||||
|
for f in data.get("value", []):
|
||||||
|
all_folders.append(f["id"])
|
||||||
|
url = data.get("@odata.nextLink")
|
||||||
|
|
||||||
|
# Posbírej message-id z každé složky (filtrováno na okno)
|
||||||
|
result: dict[str, str] = {}
|
||||||
|
cutoff_enc = cutoff_iso.replace(":", "%3A")
|
||||||
|
for fid in all_folders:
|
||||||
|
url = (
|
||||||
|
f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/messages"
|
||||||
|
f"?$filter=receivedDateTime ge {cutoff_enc}"
|
||||||
|
f"&$select=id,internetMessageId&$top=200"
|
||||||
|
)
|
||||||
|
while url:
|
||||||
|
r = _retry_graph(http_requests.get, url, _graph_headers, timeout=30)
|
||||||
|
data = r.json()
|
||||||
|
for m in data.get("value", []):
|
||||||
|
mid = _norm_mid(m.get("internetMessageId", ""))
|
||||||
|
if mid:
|
||||||
|
result[mid] = m["id"]
|
||||||
|
url = data.get("@odata.nextLink")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _make_recipient(addr: str) -> dict:
|
||||||
|
if "<" in addr and ">" in addr:
|
||||||
|
name = addr[: addr.index("<")].strip().strip('"')
|
||||||
|
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
|
||||||
|
else:
|
||||||
|
name = addr
|
||||||
|
email = addr
|
||||||
|
return {"emailAddress": {"name": name, "address": email}}
|
||||||
|
|
||||||
|
|
||||||
|
def _import_msg_to_graph(msg_path: Path, folder: str) -> Optional[str]:
|
||||||
|
"""Parse .msg and import into Graph API mailbox. Returns message ID or None."""
|
||||||
|
try:
|
||||||
|
msg = extract_msg.Message(str(msg_path))
|
||||||
|
|
||||||
|
subject = msg.subject or "(no subject)"
|
||||||
|
|
||||||
|
# Čtení těla — extract_msg může selhat na nestandartním kódování (cp1252 apod.)
|
||||||
|
try:
|
||||||
|
body_html = msg.htmlBody
|
||||||
|
if isinstance(body_html, bytes):
|
||||||
|
body_html = body_html.decode("utf-8", errors="replace")
|
||||||
|
except Exception:
|
||||||
|
body_html = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
body_text = msg.body or ""
|
||||||
|
except Exception:
|
||||||
|
body_text = ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
sender_email = msg.sender or ""
|
||||||
|
except Exception:
|
||||||
|
sender_email = ""
|
||||||
|
try:
|
||||||
|
sender_name = getattr(msg, "senderName", None) or sender_email
|
||||||
|
except Exception:
|
||||||
|
sender_name = sender_email
|
||||||
|
try:
|
||||||
|
to_raw = msg.to or ""
|
||||||
|
except Exception:
|
||||||
|
to_raw = ""
|
||||||
|
try:
|
||||||
|
cc_raw = msg.cc or ""
|
||||||
|
except Exception:
|
||||||
|
cc_raw = ""
|
||||||
|
try:
|
||||||
|
date_raw = msg.date
|
||||||
|
except Exception:
|
||||||
|
date_raw = None
|
||||||
|
|
||||||
|
att_list = []
|
||||||
|
for att in msg.attachments:
|
||||||
|
if att.data and att.longFilename:
|
||||||
|
att_list.append({
|
||||||
|
"@odata.type": "#microsoft.graph.fileAttachment",
|
||||||
|
"name": att.longFilename,
|
||||||
|
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
|
||||||
|
"contentBytes": base64.b64encode(att.data).decode(),
|
||||||
|
})
|
||||||
|
|
||||||
|
msg.close()
|
||||||
|
|
||||||
|
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
|
||||||
|
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
|
||||||
|
|
||||||
|
# Map folder and ensure it exists
|
||||||
|
folder_parts = _map_jnj_folder(folder)
|
||||||
|
folder_id = _ensure_folder(folder_parts)
|
||||||
|
|
||||||
|
ext_props = [{"id": "Integer 0x0E07", "value": "1"}]
|
||||||
|
|
||||||
|
if date_raw:
|
||||||
|
try:
|
||||||
|
dt = dtparser.parse(str(date_raw))
|
||||||
|
dt_str = dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
# PR_MESSAGE_DELIVERY_TIME (0x0E06) — jediný způsob jak nastavit
|
||||||
|
# receivedDateTime přes Graph API (přímé pole je read-only)
|
||||||
|
ext_props.append({"id": "SystemTime 0x0E06", "value": dt_str})
|
||||||
|
except Exception:
|
||||||
|
dt_str = None
|
||||||
|
else:
|
||||||
|
dt_str = None
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"subject": subject,
|
||||||
|
"body": {
|
||||||
|
"contentType": "HTML" if body_html else "Text",
|
||||||
|
"content": body_html or body_text,
|
||||||
|
},
|
||||||
|
"from": _make_recipient(f"{sender_name} <{sender_email}>"),
|
||||||
|
"toRecipients": [_make_recipient(a) for a in to_list],
|
||||||
|
"ccRecipients": [_make_recipient(a) for a in cc_list],
|
||||||
|
"isRead": True,
|
||||||
|
"singleValueExtendedProperties": ext_props,
|
||||||
|
}
|
||||||
|
|
||||||
|
if dt_str:
|
||||||
|
payload["sentDateTime"] = dt_str
|
||||||
|
|
||||||
|
if att_list:
|
||||||
|
payload["attachments"] = att_list
|
||||||
|
|
||||||
|
headers = _graph_headers()
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
|
||||||
|
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
|
||||||
|
|
||||||
|
if r.status_code == 401:
|
||||||
|
_get_graph_token()
|
||||||
|
headers = _graph_headers()
|
||||||
|
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
|
||||||
|
|
||||||
|
if r.status_code in (200, 201):
|
||||||
|
msg_id = r.json().get("id", "")
|
||||||
|
log.info("Graph OK: %s → %s", subject[:60], "/".join(folder_parts))
|
||||||
|
return msg_id
|
||||||
|
else:
|
||||||
|
log.error("Graph FAIL [%d]: %s | %s", r.status_code, subject[:60], r.text[:200])
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.error("Graph import error for %s: %s", msg_path.name, e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/upload")
|
||||||
|
async def upload_msg(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
authorization: str = Header(None),
|
||||||
|
folder: str = Form(""),
|
||||||
|
):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
is_encrypted = file.filename.endswith(".emsg")
|
||||||
|
if not file.filename.endswith(".msg") and not is_encrypted:
|
||||||
|
raise HTTPException(status_code=400, detail="Only .msg or .emsg files accepted")
|
||||||
|
|
||||||
|
# Ukládáme vždy jako .msg
|
||||||
|
msg_filename = file.filename[:-5] + ".msg" if is_encrypted else file.filename
|
||||||
|
dest = SAVE_DIR / msg_filename
|
||||||
|
if dest.exists():
|
||||||
|
return {"status": "exists", "file": msg_filename}
|
||||||
|
|
||||||
|
content = await file.read()
|
||||||
|
if is_encrypted:
|
||||||
|
content = _FERNET.decrypt(content)
|
||||||
|
|
||||||
|
with dest.open("wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
# Import to Graph API if folder was provided by client
|
||||||
|
graph_id = None
|
||||||
|
if folder:
|
||||||
|
graph_id = _import_msg_to_graph(dest, folder)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "saved",
|
||||||
|
"file": msg_filename,
|
||||||
|
"graph_id": graph_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/upload-db")
|
||||||
|
async def upload_db(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
authorization: str = Header(None)
|
||||||
|
):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
if not file.filename.endswith(".db"):
|
||||||
|
raise HTTPException(status_code=400, detail="Only .db files accepted")
|
||||||
|
for old in DB_DIR.glob("*.db"):
|
||||||
|
old.unlink()
|
||||||
|
dest = DB_DIR / file.filename
|
||||||
|
with dest.open("wb") as f:
|
||||||
|
shutil.copyfileobj(file.file, f)
|
||||||
|
return {"status": "saved", "file": file.filename}
|
||||||
|
|
||||||
|
|
||||||
|
class MessageDeleteRequest(BaseModel):
|
||||||
|
graph_id: str
|
||||||
|
|
||||||
|
|
||||||
|
class MessageUpdateRequest(BaseModel):
|
||||||
|
graph_id: str
|
||||||
|
is_read: Optional[bool] = None
|
||||||
|
folder: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _retry_graph(method, url, headers_fn, **kwargs):
|
||||||
|
"""Call Graph API, refresh token once on 401."""
|
||||||
|
headers = headers_fn()
|
||||||
|
r = method(url, headers=headers, **kwargs)
|
||||||
|
if r.status_code == 401:
|
||||||
|
_get_graph_token()
|
||||||
|
headers = headers_fn()
|
||||||
|
r = method(url, headers=headers, **kwargs)
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/message-delete")
|
||||||
|
async def message_delete(req: MessageDeleteRequest, authorization: str = Header(None)):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{req.graph_id}"
|
||||||
|
r = _retry_graph(http_requests.delete, url, _graph_headers, timeout=15)
|
||||||
|
if r.status_code in (200, 204):
|
||||||
|
log.info("Graph DELETE OK: %s", req.graph_id)
|
||||||
|
return {"status": "deleted"}
|
||||||
|
raise HTTPException(status_code=500, detail=f"Graph DELETE failed: {r.status_code} {r.text[:200]}")
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/message-update")
|
||||||
|
async def message_update(req: MessageUpdateRequest, authorization: str = Header(None)):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
current_graph_id = req.graph_id
|
||||||
|
result: dict = {"status": "ok"}
|
||||||
|
|
||||||
|
# Move first — returns new graph_id which we use for subsequent read-status update
|
||||||
|
if req.folder:
|
||||||
|
folder_parts = _map_jnj_folder(req.folder)
|
||||||
|
folder_id = _ensure_folder(folder_parts)
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{current_graph_id}/move"
|
||||||
|
r = _retry_graph(http_requests.post, url, _graph_headers,
|
||||||
|
json={"destinationId": folder_id}, timeout=15)
|
||||||
|
if r.status_code in (200, 201):
|
||||||
|
current_graph_id = r.json().get("id", current_graph_id)
|
||||||
|
result["moved"] = True
|
||||||
|
log.info("Graph MOVE OK: %s → %s", req.graph_id, "/".join(folder_parts))
|
||||||
|
else:
|
||||||
|
log.error("Graph MOVE FAIL [%d]: %s", r.status_code, r.text[:200])
|
||||||
|
result["moved"] = False
|
||||||
|
|
||||||
|
if req.is_read is not None:
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{current_graph_id}"
|
||||||
|
r = _retry_graph(http_requests.patch, url, _graph_headers,
|
||||||
|
json={"isRead": req.is_read}, timeout=15)
|
||||||
|
result["read_updated"] = r.status_code in (200, 201)
|
||||||
|
if not result["read_updated"]:
|
||||||
|
log.error("Graph PATCH isRead FAIL [%d]: %s", r.status_code, r.text[:200])
|
||||||
|
|
||||||
|
result["graph_id"] = current_graph_id
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class MirrorPlanRequest(BaseModel):
|
||||||
|
manifest: list[dict] # [{"message_id": ..., "folder": ..., "is_read": ...}]
|
||||||
|
cutoff: str # ISO8601 UTC, např. "2026-05-09T00:00:00Z"
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/mirror-plan")
|
||||||
|
async def mirror_plan(req: MirrorPlanRequest, authorization: str = Header(None)):
|
||||||
|
"""Porovná manifest zpráv z JNJ (posledních 30 dní) se stavem schránky.
|
||||||
|
|
||||||
|
- smaže ze schránky zprávy které v manifestu nejsou (smazané v JNJ / vypadlé z okna)
|
||||||
|
- vrátí to_add = message_id které ve schránce chybí (klient je pak nahraje na /upload)
|
||||||
|
|
||||||
|
Maže POUZE v rámci okna (cutoff) — starší archiv zůstává nedotčen.
|
||||||
|
"""
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
# manifest: normalizované id → původní message_id (pro echo zpět klientovi)
|
||||||
|
manifest_map: dict[str, str] = {}
|
||||||
|
for e in req.manifest:
|
||||||
|
mid = _norm_mid(e.get("message_id", ""))
|
||||||
|
if mid:
|
||||||
|
manifest_map[mid] = e["message_id"]
|
||||||
|
|
||||||
|
mailbox = _enumerate_jnj_mailbox(req.cutoff) # {norm_mid: graph_id}
|
||||||
|
|
||||||
|
to_add = [orig for nmid, orig in manifest_map.items() if nmid not in mailbox]
|
||||||
|
to_delete = [(nmid, gid) for nmid, gid in mailbox.items() if nmid not in manifest_map]
|
||||||
|
|
||||||
|
deleted = 0
|
||||||
|
for nmid, gid in to_delete:
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{gid}"
|
||||||
|
r = _retry_graph(http_requests.delete, url, _graph_headers, timeout=15)
|
||||||
|
if r.status_code in (200, 204):
|
||||||
|
deleted += 1
|
||||||
|
else:
|
||||||
|
log.error("mirror delete FAIL [%d]: %s", r.status_code, r.text[:150])
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"mirror-plan: manifest=%d mailbox=%d → add=%d delete=%d",
|
||||||
|
len(manifest_map), len(mailbox), len(to_add), deleted,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"to_add": to_add,
|
||||||
|
"deleted": deleted,
|
||||||
|
"manifest_count": len(manifest_map),
|
||||||
|
"mailbox_count": len(mailbox),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/upload-file")
|
||||||
|
async def upload_file(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
authorization: str = Header(None),
|
||||||
|
):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
if not DROPBOX_REFRESH_TOKEN:
|
||||||
|
raise HTTPException(status_code=500, detail="Dropbox not configured")
|
||||||
|
|
||||||
|
is_encrypted = file.filename.endswith(".enc")
|
||||||
|
orig_filename = file.filename[:-4] if is_encrypted else file.filename
|
||||||
|
|
||||||
|
raw = await file.read()
|
||||||
|
file_content = _FERNET.decrypt(raw) if is_encrypted else raw
|
||||||
|
|
||||||
|
dbx = dropbox.Dropbox(
|
||||||
|
app_key=DROPBOX_APP_KEY,
|
||||||
|
app_secret=DROPBOX_APP_SECRET,
|
||||||
|
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
|
||||||
|
)
|
||||||
|
dropbox_path = f"/!!!Days/Downloads Z230/{orig_filename}"
|
||||||
|
dbx.files_upload(file_content, dropbox_path, mode=dropbox.files.WriteMode.overwrite)
|
||||||
|
return {"status": "uploaded", "file": orig_filename, "dropbox_path": dropbox_path}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/status")
|
||||||
|
async def pending_files(authorization: str = Header(None)):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
dbx = dropbox.Dropbox(
|
||||||
|
app_key=DROPBOX_APP_KEY,
|
||||||
|
app_secret=DROPBOX_APP_SECRET,
|
||||||
|
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
result = dbx.files_list_folder(DROPBOX_UPLOAD_TO_JNJ)
|
||||||
|
files = [e.name for e in result.entries if isinstance(e, dropbox.files.FileMetadata)]
|
||||||
|
except Exception:
|
||||||
|
files = []
|
||||||
|
log.info("pending-files: %d souboru", len(files))
|
||||||
|
# Jména souborů zašifrujeme — klient vidí v URL jen neprůhledný token (bypass Zscaler)
|
||||||
|
encrypted_names = [_FERNET.encrypt(name.encode()).decode() for name in files]
|
||||||
|
return {"files": encrypted_names}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/item/{filename:path}")
|
||||||
|
async def download_file(filename: str, authorization: str = Header(None)):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
# filename je Fernet token (zašifrované původní jméno souboru)
|
||||||
|
try:
|
||||||
|
orig_filename = _FERNET.decrypt(filename.encode()).decode()
|
||||||
|
except Exception:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid filename token")
|
||||||
|
dbx = dropbox.Dropbox(
|
||||||
|
app_key=DROPBOX_APP_KEY,
|
||||||
|
app_secret=DROPBOX_APP_SECRET,
|
||||||
|
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
|
||||||
|
)
|
||||||
|
dropbox_path = f"{DROPBOX_UPLOAD_TO_JNJ}/{orig_filename}"
|
||||||
|
try:
|
||||||
|
_, response = dbx.files_download(dropbox_path)
|
||||||
|
raw = response.content
|
||||||
|
except Exception as e:
|
||||||
|
log.error("download-file: nelze stáhnout %s: %s", filename, e)
|
||||||
|
raise HTTPException(status_code=404, detail=f"Soubor nenalezen: {filename}")
|
||||||
|
|
||||||
|
encrypted = _FERNET.encrypt(raw)
|
||||||
|
|
||||||
|
# Přesun do Sent
|
||||||
|
sent_path = f"{DROPBOX_UPLOAD_TO_JNJ}/##Trash/{orig_filename}"
|
||||||
|
try:
|
||||||
|
dbx.files_move_v2(dropbox_path, sent_path, autorename=True)
|
||||||
|
log.info("download-file: %s přesunut do Sent", orig_filename)
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("download-file: nelze přesunout %s do Sent: %s", orig_filename, e)
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
content=encrypted,
|
||||||
|
media_type="application/octet-stream",
|
||||||
|
headers={"Content-Disposition": f'attachment; filename="{orig_filename}.enc"'},
|
||||||
|
)
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
# msgreceiver — deployment instrukce
|
||||||
|
|
||||||
|
## Soubory
|
||||||
|
- Zdrojový skript: `U:\PythonProject\Janssen\EmailsImport\DockerCustomApp\app.py`
|
||||||
|
- Network share: `\\tower\appdata\msgreceiver\app.py`
|
||||||
|
- Unraid cesta: `/mnt/user/appdata/msgreceiver/`
|
||||||
|
|
||||||
|
## Přihlašovací údaje
|
||||||
|
- **Unraid SSH:** `root@192.168.1.76`, heslo: `7309208104`
|
||||||
|
- **Docker kontejner:** `msgreceiver`
|
||||||
|
|
||||||
|
## Postup při nové verzi app.py
|
||||||
|
|
||||||
|
### 1. Zkopírovat app.py na server
|
||||||
|
```powershell
|
||||||
|
Copy-Item "U:\PythonProject\Janssen\EmailsImport\DockerCustomApp\app.py" "\\tower\appdata\msgreceiver\app.py" -Force
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Připojit se přes SSH a přebuildovat Docker (přes Python paramiko)
|
||||||
|
```python
|
||||||
|
import paramiko
|
||||||
|
c = paramiko.SSHClient()
|
||||||
|
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||||
|
c.connect('192.168.1.76', username='root', password='7309208104')
|
||||||
|
|
||||||
|
# Build
|
||||||
|
_, stdout, stderr = c.exec_command('docker build -t msgreceiver /mnt/user/appdata/msgreceiver/ 2>&1')
|
||||||
|
print(stdout.read().decode())
|
||||||
|
|
||||||
|
# Restart
|
||||||
|
_, stdout, stderr = c.exec_command('docker restart msgreceiver')
|
||||||
|
print(stdout.read().decode())
|
||||||
|
|
||||||
|
c.close()
|
||||||
|
```
|
||||||
|
|
||||||
|
> Poznámka: `sshpass` není na tomto Windows stroji k dispozici, Windows OpenSSH neumí neinteraktivní heslo — proto vždy použij **paramiko**.
|
||||||
|
|
||||||
|
## Struktura adresáře na serveru
|
||||||
|
```
|
||||||
|
/mnt/user/appdata/msgreceiver/
|
||||||
|
├── Dockerfile
|
||||||
|
├── app.py
|
||||||
|
├── requirements.txt
|
||||||
|
└── .env ← Dropbox credentials
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dropbox konfigurace (.env)
|
||||||
|
Proměnné načítané z `.env`:
|
||||||
|
- `DROPBOX_APP_KEY`
|
||||||
|
- `DROPBOX_APP_SECRET`
|
||||||
|
- `DROPBOX_APP_REFRESH_TOKEN`
|
||||||
|
|
||||||
|
Upload cesta v Dropboxu: `/!!!Days/Downloads Z230/{filename}`
|
||||||
|
|
||||||
|
## API endpointy
|
||||||
|
Bearer token: `13e1bb01-9fd5-44a8-8ce9-4ee27133d340`
|
||||||
|
|
||||||
|
| Endpoint | Přijímá | Chování |
|
||||||
|
|---|---|---|
|
||||||
|
| `POST /upload` | `.msg` / `.emsg` | `.emsg` Fernet dešifruje → uloží `.msg` do `/msgs`, přeskočí pokud existuje; volitelně import do Graphu |
|
||||||
|
| `POST /upload-db` | `.db` / `.db.xz.enc` | **v2.1:** `.db.xz.enc` Fernet dešifruje + lzma rozbalí → plain `.db`; pak smaže staré `.db` v `/msgs/db` a uloží. Plain `.db` bere i nadále (zpětná kompatibilita) |
|
||||||
|
| `POST /upload-dropbox` | cokoliv | Nahraje do Dropboxu (overwrite) |
|
||||||
|
|
||||||
|
> **v2.1 (2026-06-10):** `/upload-db` umí komprimovanou+šifrovanou DB (`.db.xz.enc`)
|
||||||
|
> od `jnj_mailbox_sync >= v1.2`. Staré `.db` se smažou **až po** úspěšném
|
||||||
|
> dešifrování/rozbalení (při chybě zůstane poslední dobrá DB). Vyžaduje `lzma`
|
||||||
|
> (stdlib) — ověřeno v kontejneru. Nasazení = jen restart (app.py je bind-mount),
|
||||||
|
> bez rebuildu.
|
||||||
|
|
||||||
|
> **Pozn. k nasazení:** `app.py` je bind-mountovaný (`/mnt/user/appdata/msgreceiver` → `/app`),
|
||||||
|
> takže pro změnu KÓDU stačí přepsat soubor + `docker restart msgreceiver`.
|
||||||
|
> `docker build` je potřeba jen při změně `requirements.txt`.
|
||||||
@@ -0,0 +1,599 @@
|
|||||||
|
# app.py | v2.1 | 2026-06-10
|
||||||
|
# FastAPI server pro příjem .msg a .db souborů, upload do Dropboxu a import do Graph API.
|
||||||
|
# Endpointy: /upload (.msg/.emsg → /msgs + Graph import),
|
||||||
|
# /upload-db (.db NEBO .db.xz.enc → Fernet desifruj + lzma rozbal → /msgs/db),
|
||||||
|
# /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230),
|
||||||
|
# /message-delete, /message-update (sync: smazání, přečtení, přesun složky),
|
||||||
|
# /mirror-plan (diff manifestu z JNJ vůči schránce → smaže přebytky, vrátí to_add),
|
||||||
|
# /status (seznam souborů k odeslání na JNJ — jména zašifrována Fernetem),
|
||||||
|
# /item/{enc_filename} (stažení souboru — enc_filename je Fernet token).
|
||||||
|
|
||||||
|
from fastapi import FastAPI, UploadFile, File, Form, Header, HTTPException, Response
|
||||||
|
from pydantic import BaseModel
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import lzma
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
import os
|
||||||
|
import dropbox
|
||||||
|
import msal
|
||||||
|
import requests as http_requests
|
||||||
|
import extract_msg
|
||||||
|
from dateutil import parser as dtparser
|
||||||
|
from datetime import timezone
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
|
||||||
|
load_dotenv(Path(__file__).parent / ".env")
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
log = logging.getLogger("msgreceiver")
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||||
|
|
||||||
|
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
|
||||||
|
# Šifrovací klíč odvozený z TOKENu (Fernet = AES-128 CBC + HMAC)
|
||||||
|
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
|
||||||
|
|
||||||
|
SAVE_DIR = Path("/msgs")
|
||||||
|
DB_DIR = Path("/msgs/db")
|
||||||
|
|
||||||
|
SAVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
DB_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
DROPBOX_APP_KEY = os.getenv("DROPBOX_APP_KEY", "")
|
||||||
|
DROPBOX_APP_SECRET = os.getenv("DROPBOX_APP_SECRET", "")
|
||||||
|
DROPBOX_REFRESH_TOKEN = os.getenv("DROPBOX_APP_REFRESH_TOKEN", "")
|
||||||
|
|
||||||
|
# --- Graph API config ---
|
||||||
|
GRAPH_TENANT_ID = "7d269944-37a4-43a1-8140-c7517dc426e9"
|
||||||
|
GRAPH_CLIENT_ID = "4b222bfd-78c9-4239-a53f-43006b3ed07f"
|
||||||
|
GRAPH_CLIENT_SECRET = "Txg8Q~MjhocuopxsJyJBhPmDfMxZ2r5WpTFj1dfk"
|
||||||
|
GRAPH_MAILBOX = "vladimir.buzalka@buzalka.cz"
|
||||||
|
GRAPH_ROOT_FOLDER = "JNJ" # subfolder under Inbox — root for imported emails
|
||||||
|
DROPBOX_UPLOAD_TO_JNJ = "/!!!Days/Downloads Z230/UploadToJNJ"
|
||||||
|
GRAPH_URL = "https://graph.microsoft.com/v1.0"
|
||||||
|
|
||||||
|
# Cache: folder path → Graph folder ID
|
||||||
|
_folder_id_cache: dict[str, str] = {}
|
||||||
|
_graph_token: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_graph_token() -> str:
|
||||||
|
global _graph_token
|
||||||
|
msalapp = msal.ConfidentialClientApplication(
|
||||||
|
GRAPH_CLIENT_ID,
|
||||||
|
authority=f"https://login.microsoftonline.com/{GRAPH_TENANT_ID}",
|
||||||
|
client_credential=GRAPH_CLIENT_SECRET,
|
||||||
|
)
|
||||||
|
result = msalapp.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
|
||||||
|
if "access_token" not in result:
|
||||||
|
raise RuntimeError(f"Graph auth failed: {result}")
|
||||||
|
_graph_token = result["access_token"]
|
||||||
|
return _graph_token
|
||||||
|
|
||||||
|
|
||||||
|
def _graph_headers() -> dict:
|
||||||
|
token = _graph_token or _get_graph_token()
|
||||||
|
return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_folder(path_parts: list[str]) -> str:
|
||||||
|
"""Ensure folder hierarchy exists under Inbox, return leaf folder ID."""
|
||||||
|
cache_key = "/".join(path_parts)
|
||||||
|
if cache_key in _folder_id_cache:
|
||||||
|
return _folder_id_cache[cache_key]
|
||||||
|
|
||||||
|
headers = _graph_headers()
|
||||||
|
parent_id = "Inbox"
|
||||||
|
|
||||||
|
for i, part in enumerate(path_parts):
|
||||||
|
partial_key = "/".join(path_parts[: i + 1])
|
||||||
|
if partial_key in _folder_id_cache:
|
||||||
|
parent_id = _folder_id_cache[partial_key]
|
||||||
|
continue
|
||||||
|
|
||||||
|
# List children of parent
|
||||||
|
if parent_id == "Inbox":
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/Inbox/childFolders"
|
||||||
|
else:
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{parent_id}/childFolders"
|
||||||
|
|
||||||
|
r = http_requests.get(url, headers=headers, timeout=15)
|
||||||
|
if r.status_code == 401:
|
||||||
|
_get_graph_token()
|
||||||
|
headers = _graph_headers()
|
||||||
|
r = http_requests.get(url, headers=headers, timeout=15)
|
||||||
|
|
||||||
|
found = None
|
||||||
|
for f in r.json().get("value", []):
|
||||||
|
if f["displayName"].lower() == part.lower():
|
||||||
|
found = f["id"]
|
||||||
|
break
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
# Create folder
|
||||||
|
cr = http_requests.post(url, headers=headers, json={"displayName": part}, timeout=15)
|
||||||
|
if cr.status_code in (200, 201):
|
||||||
|
found = cr.json()["id"]
|
||||||
|
elif cr.status_code == 409:
|
||||||
|
# Already exists (race condition) — re-fetch
|
||||||
|
r2 = http_requests.get(url, headers=headers, timeout=15)
|
||||||
|
for f in r2.json().get("value", []):
|
||||||
|
if f["displayName"].lower() == part.lower():
|
||||||
|
found = f["id"]
|
||||||
|
break
|
||||||
|
if not found:
|
||||||
|
raise RuntimeError(f"Cannot create folder '{part}': {cr.text}")
|
||||||
|
|
||||||
|
_folder_id_cache[partial_key] = found
|
||||||
|
parent_id = found
|
||||||
|
|
||||||
|
return parent_id
|
||||||
|
|
||||||
|
|
||||||
|
def _map_jnj_folder(folder: str) -> list[str]:
|
||||||
|
"""Map JNJ folder path to Graph folder parts under JNJ root.
|
||||||
|
|
||||||
|
'/vbuzalka@its.jnj.com/Inbox/TMP' → ['JNJ', 'Inbox', 'TMP']
|
||||||
|
'/Online Archive - vbuzalka@its.jnj.com/Inbox' → ['JNJ', 'Online Archive', 'Inbox']
|
||||||
|
"""
|
||||||
|
parts = [p for p in folder.split("/") if p]
|
||||||
|
if not parts:
|
||||||
|
return [GRAPH_ROOT_FOLDER]
|
||||||
|
|
||||||
|
# First part is mailbox name — strip it but detect Online Archive
|
||||||
|
mailbox = parts[0]
|
||||||
|
rest = parts[1:]
|
||||||
|
|
||||||
|
prefix = [GRAPH_ROOT_FOLDER]
|
||||||
|
if "online archive" in mailbox.lower():
|
||||||
|
prefix.append("Online Archive")
|
||||||
|
|
||||||
|
return prefix + rest if rest else prefix
|
||||||
|
|
||||||
|
|
||||||
|
def _norm_mid(mid: str) -> str:
|
||||||
|
"""Normalizuj Internet Message-ID pro porovnání (osekej <> a whitespace)."""
|
||||||
|
return (mid or "").strip().strip("<>").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _enumerate_jnj_mailbox(cutoff_iso: str) -> dict[str, str]:
|
||||||
|
"""Vrať {normalizované internetMessageId: graph_id} pro všechny zprávy ve
|
||||||
|
složkách JNJ/* schránky, které mají receivedDateTime >= cutoff_iso.
|
||||||
|
|
||||||
|
Slouží jako 'co už ve schránce je' pro mirror diff. Starší zprávy než cutoff
|
||||||
|
(např. únorový archiv) se nenačtou — mirror se jich tedy nikdy nedotkne.
|
||||||
|
"""
|
||||||
|
jnj_id = _ensure_folder([GRAPH_ROOT_FOLDER])
|
||||||
|
|
||||||
|
# BFS přes JNJ root + všechny podsložky
|
||||||
|
all_folders = [jnj_id]
|
||||||
|
i = 0
|
||||||
|
while i < len(all_folders):
|
||||||
|
fid = all_folders[i]
|
||||||
|
i += 1
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/childFolders?$top=100"
|
||||||
|
while url:
|
||||||
|
r = _retry_graph(http_requests.get, url, _graph_headers, timeout=20)
|
||||||
|
data = r.json()
|
||||||
|
for f in data.get("value", []):
|
||||||
|
all_folders.append(f["id"])
|
||||||
|
url = data.get("@odata.nextLink")
|
||||||
|
|
||||||
|
# Posbírej message-id z každé složky (filtrováno na okno)
|
||||||
|
result: dict[str, str] = {}
|
||||||
|
cutoff_enc = cutoff_iso.replace(":", "%3A")
|
||||||
|
for fid in all_folders:
|
||||||
|
url = (
|
||||||
|
f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{fid}/messages"
|
||||||
|
f"?$filter=receivedDateTime ge {cutoff_enc}"
|
||||||
|
f"&$select=id,internetMessageId&$top=200"
|
||||||
|
)
|
||||||
|
while url:
|
||||||
|
r = _retry_graph(http_requests.get, url, _graph_headers, timeout=30)
|
||||||
|
data = r.json()
|
||||||
|
for m in data.get("value", []):
|
||||||
|
mid = _norm_mid(m.get("internetMessageId", ""))
|
||||||
|
if mid:
|
||||||
|
result[mid] = m["id"]
|
||||||
|
url = data.get("@odata.nextLink")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _make_recipient(addr: str) -> dict:
|
||||||
|
if "<" in addr and ">" in addr:
|
||||||
|
name = addr[: addr.index("<")].strip().strip('"')
|
||||||
|
email = addr[addr.index("<") + 1 : addr.index(">")].strip()
|
||||||
|
else:
|
||||||
|
name = addr
|
||||||
|
email = addr
|
||||||
|
return {"emailAddress": {"name": name, "address": email}}
|
||||||
|
|
||||||
|
|
||||||
|
def _import_msg_to_graph(msg_path: Path, folder: str) -> Optional[str]:
|
||||||
|
"""Parse .msg and import into Graph API mailbox. Returns message ID or None."""
|
||||||
|
try:
|
||||||
|
msg = extract_msg.Message(str(msg_path))
|
||||||
|
|
||||||
|
subject = msg.subject or "(no subject)"
|
||||||
|
|
||||||
|
# Čtení těla — extract_msg může selhat na nestandartním kódování (cp1252 apod.)
|
||||||
|
try:
|
||||||
|
body_html = msg.htmlBody
|
||||||
|
if isinstance(body_html, bytes):
|
||||||
|
body_html = body_html.decode("utf-8", errors="replace")
|
||||||
|
except Exception:
|
||||||
|
body_html = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
body_text = msg.body or ""
|
||||||
|
except Exception:
|
||||||
|
body_text = ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
sender_email = msg.sender or ""
|
||||||
|
except Exception:
|
||||||
|
sender_email = ""
|
||||||
|
try:
|
||||||
|
sender_name = getattr(msg, "senderName", None) or sender_email
|
||||||
|
except Exception:
|
||||||
|
sender_name = sender_email
|
||||||
|
try:
|
||||||
|
to_raw = msg.to or ""
|
||||||
|
except Exception:
|
||||||
|
to_raw = ""
|
||||||
|
try:
|
||||||
|
cc_raw = msg.cc or ""
|
||||||
|
except Exception:
|
||||||
|
cc_raw = ""
|
||||||
|
try:
|
||||||
|
date_raw = msg.date
|
||||||
|
except Exception:
|
||||||
|
date_raw = None
|
||||||
|
|
||||||
|
att_list = []
|
||||||
|
for att in msg.attachments:
|
||||||
|
if att.data and att.longFilename:
|
||||||
|
att_list.append({
|
||||||
|
"@odata.type": "#microsoft.graph.fileAttachment",
|
||||||
|
"name": att.longFilename,
|
||||||
|
"contentType": getattr(att, "mimetype", None) or "application/octet-stream",
|
||||||
|
"contentBytes": base64.b64encode(att.data).decode(),
|
||||||
|
})
|
||||||
|
|
||||||
|
msg.close()
|
||||||
|
|
||||||
|
to_list = [a.strip() for a in to_raw.split(";") if a.strip()]
|
||||||
|
cc_list = [a.strip() for a in cc_raw.split(";") if a.strip()]
|
||||||
|
|
||||||
|
# Map folder and ensure it exists
|
||||||
|
folder_parts = _map_jnj_folder(folder)
|
||||||
|
folder_id = _ensure_folder(folder_parts)
|
||||||
|
|
||||||
|
ext_props = [{"id": "Integer 0x0E07", "value": "1"}]
|
||||||
|
|
||||||
|
if date_raw:
|
||||||
|
try:
|
||||||
|
dt = dtparser.parse(str(date_raw))
|
||||||
|
dt_str = dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
# PR_MESSAGE_DELIVERY_TIME (0x0E06) — jediný způsob jak nastavit
|
||||||
|
# receivedDateTime přes Graph API (přímé pole je read-only)
|
||||||
|
ext_props.append({"id": "SystemTime 0x0E06", "value": dt_str})
|
||||||
|
except Exception:
|
||||||
|
dt_str = None
|
||||||
|
else:
|
||||||
|
dt_str = None
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"subject": subject,
|
||||||
|
"body": {
|
||||||
|
"contentType": "HTML" if body_html else "Text",
|
||||||
|
"content": body_html or body_text,
|
||||||
|
},
|
||||||
|
"from": _make_recipient(f"{sender_name} <{sender_email}>"),
|
||||||
|
"toRecipients": [_make_recipient(a) for a in to_list],
|
||||||
|
"ccRecipients": [_make_recipient(a) for a in cc_list],
|
||||||
|
"isRead": True,
|
||||||
|
"singleValueExtendedProperties": ext_props,
|
||||||
|
}
|
||||||
|
|
||||||
|
if dt_str:
|
||||||
|
payload["sentDateTime"] = dt_str
|
||||||
|
|
||||||
|
if att_list:
|
||||||
|
payload["attachments"] = att_list
|
||||||
|
|
||||||
|
headers = _graph_headers()
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/mailFolders/{folder_id}/messages"
|
||||||
|
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
|
||||||
|
|
||||||
|
if r.status_code == 401:
|
||||||
|
_get_graph_token()
|
||||||
|
headers = _graph_headers()
|
||||||
|
r = http_requests.post(url, headers=headers, json=payload, timeout=30)
|
||||||
|
|
||||||
|
if r.status_code in (200, 201):
|
||||||
|
msg_id = r.json().get("id", "")
|
||||||
|
log.info("Graph OK: %s → %s", subject[:60], "/".join(folder_parts))
|
||||||
|
return msg_id
|
||||||
|
else:
|
||||||
|
log.error("Graph FAIL [%d]: %s | %s", r.status_code, subject[:60], r.text[:200])
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.error("Graph import error for %s: %s", msg_path.name, e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/upload")
|
||||||
|
async def upload_msg(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
authorization: str = Header(None),
|
||||||
|
folder: str = Form(""),
|
||||||
|
):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
is_encrypted = file.filename.endswith(".emsg")
|
||||||
|
if not file.filename.endswith(".msg") and not is_encrypted:
|
||||||
|
raise HTTPException(status_code=400, detail="Only .msg or .emsg files accepted")
|
||||||
|
|
||||||
|
# Ukládáme vždy jako .msg
|
||||||
|
msg_filename = file.filename[:-5] + ".msg" if is_encrypted else file.filename
|
||||||
|
dest = SAVE_DIR / msg_filename
|
||||||
|
if dest.exists():
|
||||||
|
return {"status": "exists", "file": msg_filename}
|
||||||
|
|
||||||
|
content = await file.read()
|
||||||
|
if is_encrypted:
|
||||||
|
content = _FERNET.decrypt(content)
|
||||||
|
|
||||||
|
with dest.open("wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
# Import to Graph API if folder was provided by client
|
||||||
|
graph_id = None
|
||||||
|
if folder:
|
||||||
|
graph_id = _import_msg_to_graph(dest, folder)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "saved",
|
||||||
|
"file": msg_filename,
|
||||||
|
"graph_id": graph_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/upload-db")
|
||||||
|
async def upload_db(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
authorization: str = Header(None)
|
||||||
|
):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
fn = file.filename or ""
|
||||||
|
is_enc = fn.endswith(".db.xz.enc") # jnj_mailbox_sync >= v1.2
|
||||||
|
if not (is_enc or fn.endswith(".db")):
|
||||||
|
raise HTTPException(status_code=400, detail="Only .db or .db.xz.enc files accepted")
|
||||||
|
|
||||||
|
content = await file.read()
|
||||||
|
if is_enc:
|
||||||
|
# Fernet desifra -> lzma rozbal -> plain .db (jako .emsg -> .msg u /upload)
|
||||||
|
content = lzma.decompress(_FERNET.decrypt(content))
|
||||||
|
db_filename = fn[: -len(".xz.enc")] # jnjemails_<ts>.db
|
||||||
|
else:
|
||||||
|
db_filename = fn
|
||||||
|
|
||||||
|
# Smazat stare AZ po uspesnem desifrovani/rozbaleni — pri chybe stara DB zustane.
|
||||||
|
for old in DB_DIR.glob("*.db"):
|
||||||
|
old.unlink()
|
||||||
|
dest = DB_DIR / db_filename
|
||||||
|
with dest.open("wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
return {"status": "saved", "file": db_filename, "bytes": len(content), "encrypted": is_enc}
|
||||||
|
|
||||||
|
|
||||||
|
class MessageDeleteRequest(BaseModel):
|
||||||
|
graph_id: str
|
||||||
|
|
||||||
|
|
||||||
|
class MessageUpdateRequest(BaseModel):
|
||||||
|
graph_id: str
|
||||||
|
is_read: Optional[bool] = None
|
||||||
|
folder: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _retry_graph(method, url, headers_fn, **kwargs):
|
||||||
|
"""Call Graph API, refresh token once on 401."""
|
||||||
|
headers = headers_fn()
|
||||||
|
r = method(url, headers=headers, **kwargs)
|
||||||
|
if r.status_code == 401:
|
||||||
|
_get_graph_token()
|
||||||
|
headers = headers_fn()
|
||||||
|
r = method(url, headers=headers, **kwargs)
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/message-delete")
|
||||||
|
async def message_delete(req: MessageDeleteRequest, authorization: str = Header(None)):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{req.graph_id}"
|
||||||
|
r = _retry_graph(http_requests.delete, url, _graph_headers, timeout=15)
|
||||||
|
if r.status_code in (200, 204):
|
||||||
|
log.info("Graph DELETE OK: %s", req.graph_id)
|
||||||
|
return {"status": "deleted"}
|
||||||
|
raise HTTPException(status_code=500, detail=f"Graph DELETE failed: {r.status_code} {r.text[:200]}")
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/message-update")
|
||||||
|
async def message_update(req: MessageUpdateRequest, authorization: str = Header(None)):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
current_graph_id = req.graph_id
|
||||||
|
result: dict = {"status": "ok"}
|
||||||
|
|
||||||
|
# Move first — returns new graph_id which we use for subsequent read-status update
|
||||||
|
if req.folder:
|
||||||
|
folder_parts = _map_jnj_folder(req.folder)
|
||||||
|
folder_id = _ensure_folder(folder_parts)
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{current_graph_id}/move"
|
||||||
|
r = _retry_graph(http_requests.post, url, _graph_headers,
|
||||||
|
json={"destinationId": folder_id}, timeout=15)
|
||||||
|
if r.status_code in (200, 201):
|
||||||
|
current_graph_id = r.json().get("id", current_graph_id)
|
||||||
|
result["moved"] = True
|
||||||
|
log.info("Graph MOVE OK: %s → %s", req.graph_id, "/".join(folder_parts))
|
||||||
|
else:
|
||||||
|
log.error("Graph MOVE FAIL [%d]: %s", r.status_code, r.text[:200])
|
||||||
|
result["moved"] = False
|
||||||
|
|
||||||
|
if req.is_read is not None:
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{current_graph_id}"
|
||||||
|
r = _retry_graph(http_requests.patch, url, _graph_headers,
|
||||||
|
json={"isRead": req.is_read}, timeout=15)
|
||||||
|
result["read_updated"] = r.status_code in (200, 201)
|
||||||
|
if not result["read_updated"]:
|
||||||
|
log.error("Graph PATCH isRead FAIL [%d]: %s", r.status_code, r.text[:200])
|
||||||
|
|
||||||
|
result["graph_id"] = current_graph_id
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class MirrorPlanRequest(BaseModel):
|
||||||
|
manifest: list[dict] # [{"message_id": ..., "folder": ..., "is_read": ...}]
|
||||||
|
cutoff: str # ISO8601 UTC, např. "2026-05-09T00:00:00Z"
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/mirror-plan")
|
||||||
|
async def mirror_plan(req: MirrorPlanRequest, authorization: str = Header(None)):
|
||||||
|
"""Porovná manifest zpráv z JNJ (posledních 30 dní) se stavem schránky.
|
||||||
|
|
||||||
|
- smaže ze schránky zprávy které v manifestu nejsou (smazané v JNJ / vypadlé z okna)
|
||||||
|
- vrátí to_add = message_id které ve schránce chybí (klient je pak nahraje na /upload)
|
||||||
|
|
||||||
|
Maže POUZE v rámci okna (cutoff) — starší archiv zůstává nedotčen.
|
||||||
|
"""
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
# manifest: normalizované id → původní message_id (pro echo zpět klientovi)
|
||||||
|
manifest_map: dict[str, str] = {}
|
||||||
|
for e in req.manifest:
|
||||||
|
mid = _norm_mid(e.get("message_id", ""))
|
||||||
|
if mid:
|
||||||
|
manifest_map[mid] = e["message_id"]
|
||||||
|
|
||||||
|
mailbox = _enumerate_jnj_mailbox(req.cutoff) # {norm_mid: graph_id}
|
||||||
|
|
||||||
|
to_add = [orig for nmid, orig in manifest_map.items() if nmid not in mailbox]
|
||||||
|
to_delete = [(nmid, gid) for nmid, gid in mailbox.items() if nmid not in manifest_map]
|
||||||
|
|
||||||
|
deleted = 0
|
||||||
|
for nmid, gid in to_delete:
|
||||||
|
url = f"{GRAPH_URL}/users/{GRAPH_MAILBOX}/messages/{gid}"
|
||||||
|
r = _retry_graph(http_requests.delete, url, _graph_headers, timeout=15)
|
||||||
|
if r.status_code in (200, 204):
|
||||||
|
deleted += 1
|
||||||
|
else:
|
||||||
|
log.error("mirror delete FAIL [%d]: %s", r.status_code, r.text[:150])
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"mirror-plan: manifest=%d mailbox=%d → add=%d delete=%d",
|
||||||
|
len(manifest_map), len(mailbox), len(to_add), deleted,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"to_add": to_add,
|
||||||
|
"deleted": deleted,
|
||||||
|
"manifest_count": len(manifest_map),
|
||||||
|
"mailbox_count": len(mailbox),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/upload-file")
|
||||||
|
async def upload_file(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
authorization: str = Header(None),
|
||||||
|
):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
if not DROPBOX_REFRESH_TOKEN:
|
||||||
|
raise HTTPException(status_code=500, detail="Dropbox not configured")
|
||||||
|
|
||||||
|
is_encrypted = file.filename.endswith(".enc")
|
||||||
|
orig_filename = file.filename[:-4] if is_encrypted else file.filename
|
||||||
|
|
||||||
|
raw = await file.read()
|
||||||
|
file_content = _FERNET.decrypt(raw) if is_encrypted else raw
|
||||||
|
|
||||||
|
dbx = dropbox.Dropbox(
|
||||||
|
app_key=DROPBOX_APP_KEY,
|
||||||
|
app_secret=DROPBOX_APP_SECRET,
|
||||||
|
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
|
||||||
|
)
|
||||||
|
dropbox_path = f"/!!!Days/Downloads Z230/{orig_filename}"
|
||||||
|
dbx.files_upload(file_content, dropbox_path, mode=dropbox.files.WriteMode.overwrite)
|
||||||
|
return {"status": "uploaded", "file": orig_filename, "dropbox_path": dropbox_path}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/status")
|
||||||
|
async def pending_files(authorization: str = Header(None)):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
dbx = dropbox.Dropbox(
|
||||||
|
app_key=DROPBOX_APP_KEY,
|
||||||
|
app_secret=DROPBOX_APP_SECRET,
|
||||||
|
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
result = dbx.files_list_folder(DROPBOX_UPLOAD_TO_JNJ)
|
||||||
|
files = [e.name for e in result.entries if isinstance(e, dropbox.files.FileMetadata)]
|
||||||
|
except Exception:
|
||||||
|
files = []
|
||||||
|
log.info("pending-files: %d souboru", len(files))
|
||||||
|
# Jména souborů zašifrujeme — klient vidí v URL jen neprůhledný token (bypass Zscaler)
|
||||||
|
encrypted_names = [_FERNET.encrypt(name.encode()).decode() for name in files]
|
||||||
|
return {"files": encrypted_names}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/item/{filename:path}")
|
||||||
|
async def download_file(filename: str, authorization: str = Header(None)):
|
||||||
|
if authorization != f"Bearer {TOKEN}":
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
# filename je Fernet token (zašifrované původní jméno souboru)
|
||||||
|
try:
|
||||||
|
orig_filename = _FERNET.decrypt(filename.encode()).decode()
|
||||||
|
except Exception:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid filename token")
|
||||||
|
dbx = dropbox.Dropbox(
|
||||||
|
app_key=DROPBOX_APP_KEY,
|
||||||
|
app_secret=DROPBOX_APP_SECRET,
|
||||||
|
oauth2_refresh_token=DROPBOX_REFRESH_TOKEN,
|
||||||
|
)
|
||||||
|
dropbox_path = f"{DROPBOX_UPLOAD_TO_JNJ}/{orig_filename}"
|
||||||
|
try:
|
||||||
|
_, response = dbx.files_download(dropbox_path)
|
||||||
|
raw = response.content
|
||||||
|
except Exception as e:
|
||||||
|
log.error("download-file: nelze stáhnout %s: %s", filename, e)
|
||||||
|
raise HTTPException(status_code=404, detail=f"Soubor nenalezen: {filename}")
|
||||||
|
|
||||||
|
encrypted = _FERNET.encrypt(raw)
|
||||||
|
|
||||||
|
# Přesun do Sent
|
||||||
|
sent_path = f"{DROPBOX_UPLOAD_TO_JNJ}/##Trash/{orig_filename}"
|
||||||
|
try:
|
||||||
|
dbx.files_move_v2(dropbox_path, sent_path, autorename=True)
|
||||||
|
log.info("download-file: %s přesunut do Sent", orig_filename)
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("download-file: nelze přesunout %s do Sent: %s", orig_filename, e)
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
content=encrypted,
|
||||||
|
media_type="application/octet-stream",
|
||||||
|
headers={"Content-Disposition": f'attachment; filename="{orig_filename}.enc"'},
|
||||||
|
)
|
||||||
@@ -58,6 +58,30 @@ Bearer token: `13e1bb01-9fd5-44a8-8ce9-4ee27133d340`
|
|||||||
|
|
||||||
| Endpoint | Přijímá | Chování |
|
| Endpoint | Přijímá | Chování |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
| `POST /upload` | `.msg` | Uloží do `/msgs`, přeskočí pokud existuje |
|
| `POST /upload` | `.msg` / `.emsg` | `.emsg` Fernet dešifruje → uloží `.msg` do `/msgs`, přeskočí pokud existuje; volitelně import do Graphu |
|
||||||
| `POST /upload-db` | `.db` | Smaže všechny staré `.db` v `/msgs/db`, uloží novou |
|
| `POST /upload-db` | `.db` / `.db.xz.enc` | **v2.1:** `.db.xz.enc` Fernet dešifruje + lzma rozbalí → plain `.db`; pak smaže staré `.db` v `/msgs/db` a uloží. Plain `.db` bere i nadále (zpětná kompatibilita) |
|
||||||
| `POST /upload-dropbox` | cokoliv | Nahraje do Dropboxu (overwrite) |
|
| `POST /upload-dropbox` | cokoliv | Nahraje do Dropboxu (overwrite) |
|
||||||
|
|
||||||
|
> **v2.1 (2026-06-10):** `/upload-db` umí komprimovanou+šifrovanou DB (`.db.xz.enc`)
|
||||||
|
> od `jnj_mailbox_sync >= v1.2`. Staré `.db` se smažou **až po** úspěšném
|
||||||
|
> dešifrování/rozbalení (při chybě zůstane poslední dobrá DB). Vyžaduje `lzma`
|
||||||
|
> (stdlib) — ověřeno v kontejneru. Nasazení = jen restart (app.py je bind-mount),
|
||||||
|
> bez rebuildu.
|
||||||
|
|
||||||
|
> **v2.3 (2026-06-10):** `/item/{token}` — při `Accept: application/json`
|
||||||
|
> (klient `janssenpc_file_receive >= v1.2`) vrací `{"data": "<fernet_b64>"}`
|
||||||
|
> místo binární přílohy. Důvod: JNJ filtr (Zscaler/SiteMinder) blokoval binární
|
||||||
|
> downloady — zachytil odpověď, replay GET bez auth (401 v logu) a klientovi
|
||||||
|
> vrátil 403 + `?_sm_nck=1`. JSON inspekci příloh nespouští. Bez `Accept`
|
||||||
|
> hlavičky zůstává binární režim (zpětná kompatibilita s v1.1).
|
||||||
|
|
||||||
|
> **v2.2 (2026-06-10):** `/item/{token}` — oprava 500 u souborů s ne-ASCII znaky
|
||||||
|
> ve jméně (např. `▲▲...pdf`): `Content-Disposition` je nyní ASCII fallback +
|
||||||
|
> RFC 5987 `filename*` (HTTP hlavičky jsou latin-1, `▲` shazoval Response na
|
||||||
|
> UnicodeEncodeError). Zároveň se přesun do `##Trash` dělá až PO sestavení
|
||||||
|
> odpovědi — pád už neodstraní soubor z fronty. Klient (`janssenpc_file_receive`)
|
||||||
|
> hlavičku nečte, žádná změna na JNJ straně není potřeba.
|
||||||
|
|
||||||
|
> **Pozn. k nasazení:** `app.py` je bind-mountovaný (`/mnt/user/appdata/msgreceiver` → `/app`),
|
||||||
|
> takže pro změnu KÓDU stačí přepsat soubor + `docker restart msgreceiver`.
|
||||||
|
> `docker build` je potřeba jen při změně `requirements.txt`.
|
||||||
|
|||||||
@@ -1,20 +1,24 @@
|
|||||||
# app.py | v2.0 | 2026-06-08
|
# app.py | v2.3 | 2026-06-10
|
||||||
# FastAPI server pro příjem .msg a .db souborů, upload do Dropboxu a import do Graph API.
|
# FastAPI server pro příjem .msg a .db souborů, upload do Dropboxu a import do Graph API.
|
||||||
# Endpointy: /upload (.msg → /msgs + Graph import), /upload-db (.db → /msgs/db),
|
# Endpointy: /upload (.msg/.emsg → /msgs + Graph import),
|
||||||
|
# /upload-db (.db NEBO .db.xz.enc → Fernet desifruj + lzma rozbal → /msgs/db),
|
||||||
# /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230),
|
# /upload-dropbox (→ Dropbox /!!!Days/Downloads Z230),
|
||||||
# /message-delete, /message-update (sync: smazání, přečtení, přesun složky),
|
# /message-delete, /message-update (sync: smazání, přečtení, přesun složky),
|
||||||
# /mirror-plan (diff manifestu z JNJ vůči schránce → smaže přebytky, vrátí to_add),
|
# /mirror-plan (diff manifestu z JNJ vůči schránce → smaže přebytky, vrátí to_add),
|
||||||
# /status (seznam souborů k odeslání na JNJ — jména zašifrována Fernetem),
|
# /status (seznam souborů k odeslání na JNJ — jména zašifrována Fernetem),
|
||||||
# /item/{enc_filename} (stažení souboru — enc_filename je Fernet token).
|
# /item/{enc_filename} (stažení souboru — enc_filename je Fernet token;
|
||||||
|
# Accept: application/json → {"data": fernet_b64}, jinak binárka).
|
||||||
|
|
||||||
from fastapi import FastAPI, UploadFile, File, Form, Header, HTTPException, Response
|
from fastapi import FastAPI, Request, UploadFile, File, Form, Header, HTTPException, Response
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
import shutil
|
|
||||||
import base64
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
|
import lzma
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from urllib.parse import quote
|
||||||
import os
|
import os
|
||||||
import dropbox
|
import dropbox
|
||||||
import msal
|
import msal
|
||||||
@@ -372,14 +376,27 @@ async def upload_db(
|
|||||||
):
|
):
|
||||||
if authorization != f"Bearer {TOKEN}":
|
if authorization != f"Bearer {TOKEN}":
|
||||||
raise HTTPException(status_code=401, detail="Unauthorized")
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
if not file.filename.endswith(".db"):
|
|
||||||
raise HTTPException(status_code=400, detail="Only .db files accepted")
|
fn = file.filename or ""
|
||||||
|
is_enc = fn.endswith(".db.xz.enc") # jnj_mailbox_sync >= v1.2
|
||||||
|
if not (is_enc or fn.endswith(".db")):
|
||||||
|
raise HTTPException(status_code=400, detail="Only .db or .db.xz.enc files accepted")
|
||||||
|
|
||||||
|
content = await file.read()
|
||||||
|
if is_enc:
|
||||||
|
# Fernet desifra -> lzma rozbal -> plain .db (jako .emsg -> .msg u /upload)
|
||||||
|
content = lzma.decompress(_FERNET.decrypt(content))
|
||||||
|
db_filename = fn[: -len(".xz.enc")] # jnjemails_<ts>.db
|
||||||
|
else:
|
||||||
|
db_filename = fn
|
||||||
|
|
||||||
|
# Smazat stare AZ po uspesnem desifrovani/rozbaleni — pri chybe stara DB zustane.
|
||||||
for old in DB_DIR.glob("*.db"):
|
for old in DB_DIR.glob("*.db"):
|
||||||
old.unlink()
|
old.unlink()
|
||||||
dest = DB_DIR / file.filename
|
dest = DB_DIR / db_filename
|
||||||
with dest.open("wb") as f:
|
with dest.open("wb") as f:
|
||||||
shutil.copyfileobj(file.file, f)
|
f.write(content)
|
||||||
return {"status": "saved", "file": file.filename}
|
return {"status": "saved", "file": db_filename, "bytes": len(content), "encrypted": is_enc}
|
||||||
|
|
||||||
|
|
||||||
class MessageDeleteRequest(BaseModel):
|
class MessageDeleteRequest(BaseModel):
|
||||||
@@ -547,7 +564,7 @@ async def pending_files(authorization: str = Header(None)):
|
|||||||
|
|
||||||
|
|
||||||
@app.get("/item/{filename:path}")
|
@app.get("/item/{filename:path}")
|
||||||
async def download_file(filename: str, authorization: str = Header(None)):
|
async def download_file(filename: str, request: Request, authorization: str = Header(None)):
|
||||||
if authorization != f"Bearer {TOKEN}":
|
if authorization != f"Bearer {TOKEN}":
|
||||||
raise HTTPException(status_code=401, detail="Unauthorized")
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
# filename je Fernet token (zašifrované původní jméno souboru)
|
# filename je Fernet token (zašifrované původní jméno souboru)
|
||||||
@@ -570,7 +587,28 @@ async def download_file(filename: str, authorization: str = Header(None)):
|
|||||||
|
|
||||||
encrypted = _FERNET.encrypt(raw)
|
encrypted = _FERNET.encrypt(raw)
|
||||||
|
|
||||||
# Přesun do Sent
|
if "application/json" in (request.headers.get("accept") or ""):
|
||||||
|
# v2.3: klient >= v1.2 — obsah jako JSON, ne binární příloha. Korporátní
|
||||||
|
# filtr (Zscaler/SiteMinder) pak nevidí "stahování souboru" a nespouští
|
||||||
|
# AV sandbox, který binární odpovědi blokoval (403 + ?_sm_nck=1).
|
||||||
|
# Fernet token je sám o sobě urlsafe-base64 text → rovnou do JSON.
|
||||||
|
resp = JSONResponse(content={"data": encrypted.decode()})
|
||||||
|
else:
|
||||||
|
# Starý klient (<= v1.1) — binární odpověď jako dřív.
|
||||||
|
# HTTP hlavičky jsou latin-1 — jméno s ne-ASCII znaky (např. ▲▲) by shodilo
|
||||||
|
# Response na UnicodeEncodeError (500). ASCII fallback + RFC 5987 filename*.
|
||||||
|
# Klient hlavičku stejně nečte (jméno zná z dešifrovaného tokenu).
|
||||||
|
fname = f"{orig_filename}.enc"
|
||||||
|
ascii_fallback = fname.encode("ascii", "ignore").decode().replace('"', "") or "file.enc"
|
||||||
|
resp = Response(
|
||||||
|
content=encrypted,
|
||||||
|
media_type="application/octet-stream",
|
||||||
|
headers={"Content-Disposition":
|
||||||
|
f"attachment; filename=\"{ascii_fallback}\"; filename*=UTF-8''{quote(fname)}"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Přesun do Sent — až PO úspěšném sestavení odpovědi, aby případný pád
|
||||||
|
# neodstranil soubor z fronty UploadToJNJ dřív, než ho klient dostane.
|
||||||
sent_path = f"{DROPBOX_UPLOAD_TO_JNJ}/##Trash/{orig_filename}"
|
sent_path = f"{DROPBOX_UPLOAD_TO_JNJ}/##Trash/{orig_filename}"
|
||||||
try:
|
try:
|
||||||
dbx.files_move_v2(dropbox_path, sent_path, autorename=True)
|
dbx.files_move_v2(dropbox_path, sent_path, autorename=True)
|
||||||
@@ -578,8 +616,4 @@ async def download_file(filename: str, authorization: str = Header(None)):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning("download-file: nelze přesunout %s do Sent: %s", orig_filename, e)
|
log.warning("download-file: nelze přesunout %s do Sent: %s", orig_filename, e)
|
||||||
|
|
||||||
return Response(
|
return resp
|
||||||
content=encrypted,
|
|
||||||
media_type="application/octet-stream",
|
|
||||||
headers={"Content-Disposition": f'attachment; filename="{orig_filename}.enc"'},
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -0,0 +1,580 @@
|
|||||||
|
"""
|
||||||
|
jnj_mailbox_sync v1.1
|
||||||
|
Nazev: jnj_mailbox_sync_v1.1.py
|
||||||
|
Verze: 1.1.0
|
||||||
|
Datum: 2026-06-10
|
||||||
|
Autor: vladimir.buzalka
|
||||||
|
|
||||||
|
Popis:
|
||||||
|
Synchronizace JNJ Outlooku (MAPI) -> osobni schranka + bookkeeping v SQLite.
|
||||||
|
Nasledník inbox_full_sync_v1.1. Nove navic sleduje PRESUN emailu mezi
|
||||||
|
slozkami a priznak "uz neni ve schrance" — BEZ opetovneho prenosu tela.
|
||||||
|
|
||||||
|
Scope: primarni schranka, Inbox + Sent Items + Deleted Items vcetne vsech
|
||||||
|
podsložek. (v1.1: pridano Deleted Items — uzivatel po precteni maily MAZE,
|
||||||
|
takze precteny-smazany mail se ted sleduje jako /Deleted Items misto aby
|
||||||
|
skoncil jako "ghost" s posledni cestou /Inbox.)
|
||||||
|
Online Archive se NEskenuje — firemni pravidla tam presouvaji nejstarsi
|
||||||
|
emaily, ktere uz mame davno stazene. Kdyz email ze skenovane schranky
|
||||||
|
zmizi (presun do nesken. slozky / vyprazdneni Deleted), ponecha se POSLEDNI
|
||||||
|
ZNAMA cesta a nastavi se priznak not_in_mailbox_anymore=1.
|
||||||
|
|
||||||
|
Identita emailu = Internet Message-ID (stabilni pres presuny). EntryID se
|
||||||
|
pri presunu meni — drzime ho jen jako pomocny.
|
||||||
|
|
||||||
|
Sloupce cest v SQLite:
|
||||||
|
folder = cesta pri PRVNIM zachyceni (historie, neprepisuje se)
|
||||||
|
jnj_folder = AKTUALNI ziva cesta (prepisuje se pri presunu)
|
||||||
|
Sloupec updated_at se bumpne pri insertu i kazde zmene — slouzi pro
|
||||||
|
inkrementalni sync na domaci strane (watermark).
|
||||||
|
|
||||||
|
Rezimy (--mode):
|
||||||
|
capture (default) Projde cely Inbox+Sent, nove emaily ulozi a nahraje
|
||||||
|
(jako inbox_full_sync). Okno --days se IGNORUJE (bere VSE).
|
||||||
|
Detekce "opustilo schranku" se v tomto rezimu NEdela (neskenuje
|
||||||
|
se archiv, takze by to delalo falesne poplachy).
|
||||||
|
update-paths Jen METADATA. Projde okno poslednich --days dni, aktualizuje
|
||||||
|
cesty/precteno znamych emailu a oznaci ty, co ze schranky
|
||||||
|
zmizely. NIC nenahrava (zadny .msg upload).
|
||||||
|
full-update update-paths + navic dorovna chybejici emaily (SaveAs+upload).
|
||||||
|
|
||||||
|
Argumenty:
|
||||||
|
--mode {capture,update-paths,full-update} default capture
|
||||||
|
--days N velikost okna ve dnech (default 30). 0 = cely Inbox+Sent.
|
||||||
|
--dry-run NIC nezapise/nenahraje, jen vypise co by udelal (+ souhrn).
|
||||||
|
--limit N zpracovat max N polozek (rychly test).
|
||||||
|
--no-db-upload na konci nenahravat SQLite na server.
|
||||||
|
|
||||||
|
Spousteni:
|
||||||
|
# 1) Nejdriv si PRECIST, co by full-update prinesl (NIC nezmeni):
|
||||||
|
python jnj_mailbox_sync_v1.1.py --mode full-update --days 30 --dry-run
|
||||||
|
|
||||||
|
# 2) Pak naostro:
|
||||||
|
python jnj_mailbox_sync_v1.1.py --mode full-update --days 30
|
||||||
|
|
||||||
|
Zavislosti:
|
||||||
|
pywin32, requests, cryptography, sqlite3 (stdlib).
|
||||||
|
Python 3.10+, Windows, Outlook musi byt spusteny a prihlaseny.
|
||||||
|
|
||||||
|
Historie verzi:
|
||||||
|
1.0.0 2026-06-09 Nova generace: rezimy capture/update-paths/full-update,
|
||||||
|
sledovani presunu (jnj_folder), priznak
|
||||||
|
not_in_mailbox_anymore, sloupec updated_at pro
|
||||||
|
inkrementalni sync domu. Nasledník inbox_full_sync_v1.1.
|
||||||
|
1.1.0 2026-06-10 + Deleted Items do SYNC_FOLDERS (olFolderDeletedItems=3).
|
||||||
|
Precteny-smazany mail se ted sleduje jako /Deleted Items;
|
||||||
|
drive ghost s posledni cestou /Inbox. Pri 1. behu se
|
||||||
|
drive zghostovane maily najdou v Deleted -> jnj_folder
|
||||||
|
opraven na /Deleted Items + not_in_mailbox_anymore=0.
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import win32com.client
|
||||||
|
import requests
|
||||||
|
import urllib3
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
|
||||||
|
if hasattr(sys.stdout, "reconfigure"):
|
||||||
|
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
|
||||||
|
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
|
||||||
|
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
|
||||||
|
DB_UPLOAD_URL = "https://msgs.buzalka.cz/upload-db"
|
||||||
|
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
|
||||||
|
LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnj_mailbox_sync_errors.log"
|
||||||
|
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
|
||||||
|
SCRIPT_NAME = "jnj_mailbox_sync"
|
||||||
|
SCRIPT_VERSION = "1.1.0"
|
||||||
|
|
||||||
|
# olFolderInbox=6, olFolderSentMail=5, olFolderDeletedItems=3
|
||||||
|
SYNC_FOLDERS = [(6, "Inbox"), (5, "Sent Items"), (3, "Deleted Items")]
|
||||||
|
OLSAVE_MSG = 3 # OlSaveAsType.olMSG
|
||||||
|
|
||||||
|
# Sifrovaci klic odvozeny z TOKENu (stejny algoritmus jako server)
|
||||||
|
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
filename=LOG_PATH,
|
||||||
|
level=logging.ERROR,
|
||||||
|
format="%(asctime)s | %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
# ─── SQLite ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def init_db(conn):
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS messages (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
message_id TEXT NOT NULL,
|
||||||
|
subject TEXT,
|
||||||
|
sender TEXT,
|
||||||
|
received_at TEXT,
|
||||||
|
folder TEXT,
|
||||||
|
source TEXT,
|
||||||
|
uploaded_at TEXT DEFAULT (datetime('now')),
|
||||||
|
entry_id TEXT,
|
||||||
|
graph_id TEXT,
|
||||||
|
is_read INTEGER DEFAULT 0,
|
||||||
|
jnj_folder TEXT,
|
||||||
|
not_in_mailbox_anymore INTEGER DEFAULT 0,
|
||||||
|
left_mailbox_at TEXT,
|
||||||
|
updated_at TEXT
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
|
||||||
|
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS runs (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
script TEXT NOT NULL,
|
||||||
|
version TEXT,
|
||||||
|
started_at TEXT NOT NULL,
|
||||||
|
finished_at TEXT,
|
||||||
|
mode TEXT,
|
||||||
|
window_days INTEGER,
|
||||||
|
dry_run INTEGER DEFAULT 0,
|
||||||
|
found INTEGER DEFAULT 0,
|
||||||
|
new_captured INTEGER DEFAULT 0,
|
||||||
|
path_updated INTEGER DEFAULT 0,
|
||||||
|
read_updated INTEGER DEFAULT 0,
|
||||||
|
returned INTEGER DEFAULT 0,
|
||||||
|
left_mailbox INTEGER DEFAULT 0,
|
||||||
|
skipped INTEGER DEFAULT 0,
|
||||||
|
errors INTEGER DEFAULT 0
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS log (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
run_id INTEGER REFERENCES runs(id),
|
||||||
|
level TEXT NOT NULL,
|
||||||
|
event TEXT NOT NULL,
|
||||||
|
subject TEXT,
|
||||||
|
folder TEXT,
|
||||||
|
graph_id TEXT,
|
||||||
|
detail TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now'))
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_log_run_id ON log(run_id)")
|
||||||
|
|
||||||
|
# Migrace existujici jnjemails.db (z inbox_full_sync) — pridej chybejici sloupce
|
||||||
|
for col, ddl in [
|
||||||
|
("entry_id", "TEXT"), ("graph_id", "TEXT"), ("is_read", "INTEGER DEFAULT 0"),
|
||||||
|
("jnj_folder", "TEXT"), ("not_in_mailbox_anymore", "INTEGER DEFAULT 0"),
|
||||||
|
("left_mailbox_at", "TEXT"), ("updated_at", "TEXT"),
|
||||||
|
]:
|
||||||
|
try:
|
||||||
|
conn.execute(f"ALTER TABLE messages ADD COLUMN {col} {ddl}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
for col, ddl in [
|
||||||
|
("mode", "TEXT"), ("window_days", "INTEGER"), ("dry_run", "INTEGER DEFAULT 0"),
|
||||||
|
("found", "INTEGER DEFAULT 0"), ("new_captured", "INTEGER DEFAULT 0"),
|
||||||
|
("path_updated", "INTEGER DEFAULT 0"), ("read_updated", "INTEGER DEFAULT 0"),
|
||||||
|
("returned", "INTEGER DEFAULT 0"), ("left_mailbox", "INTEGER DEFAULT 0"),
|
||||||
|
]:
|
||||||
|
try:
|
||||||
|
conn.execute(f"ALTER TABLE runs ADD COLUMN {col} {ddl}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Indexy na sloupce, ktere mohly vzniknout az migraci vyse
|
||||||
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_updated_at ON messages(updated_at)")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def start_run(conn, mode, days, dry):
|
||||||
|
cur = conn.execute(
|
||||||
|
"""INSERT INTO runs (script, version, started_at, mode, window_days, dry_run)
|
||||||
|
VALUES (?, ?, datetime('now'), ?, ?, ?)""",
|
||||||
|
(SCRIPT_NAME, SCRIPT_VERSION, mode, days, 1 if dry else 0),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return cur.lastrowid
|
||||||
|
|
||||||
|
|
||||||
|
def finish_run(conn, run_id, stats):
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE runs SET finished_at=datetime('now'),
|
||||||
|
found=?, new_captured=?, path_updated=?, read_updated=?,
|
||||||
|
returned=?, left_mailbox=?, skipped=?, errors=?
|
||||||
|
WHERE id=?""",
|
||||||
|
(stats["found"], stats["new_captured"], stats["path_updated"],
|
||||||
|
stats["read_updated"], stats["returned"], stats["left_mailbox"],
|
||||||
|
stats["skipped"], stats["errors"], run_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def db_log(conn, run_id, level, event, subject=None, folder=None, graph_id=None, detail=None):
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO log (run_id, level, event, subject, folder, graph_id, detail)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
||||||
|
(run_id, level, event, subject, folder, graph_id, detail),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def info(conn, run_id, event, **kw):
|
||||||
|
db_log(conn, run_id, "INFO", event, **kw)
|
||||||
|
|
||||||
|
|
||||||
|
def error(conn, run_id, event, **kw):
|
||||||
|
db_log(conn, run_id, "ERROR", event, **kw)
|
||||||
|
|
||||||
|
|
||||||
|
def db_get(conn, mid):
|
||||||
|
cur = conn.execute(
|
||||||
|
"""SELECT message_id, folder, jnj_folder, is_read, not_in_mailbox_anymore
|
||||||
|
FROM messages WHERE message_id=?""", (mid,))
|
||||||
|
r = cur.fetchone()
|
||||||
|
if not r:
|
||||||
|
return None
|
||||||
|
return {"message_id": r[0], "folder": r[1], "jnj_folder": r[2],
|
||||||
|
"is_read": r[3], "not_in_mailbox_anymore": r[4]}
|
||||||
|
|
||||||
|
|
||||||
|
def apply_update(conn, mid, changes):
|
||||||
|
sets, vals = [], []
|
||||||
|
for k, v in changes.items():
|
||||||
|
sets.append(f"{k}=?")
|
||||||
|
vals.append(v)
|
||||||
|
sets.append("updated_at=datetime('now')")
|
||||||
|
vals.append(mid)
|
||||||
|
conn.execute(f"UPDATE messages SET {', '.join(sets)} WHERE message_id=?", vals)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Outlook / prenos ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def get_mid(item) -> str:
|
||||||
|
try:
|
||||||
|
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
|
||||||
|
except Exception:
|
||||||
|
mid = None
|
||||||
|
return mid or f"entryid:{item.EntryID}"
|
||||||
|
|
||||||
|
|
||||||
|
def upload_msg(msg_path, filename, folder=""):
|
||||||
|
with open(msg_path, "rb") as f:
|
||||||
|
encrypted = _FERNET.encrypt(f.read())
|
||||||
|
enc_filename = Path(filename).stem + ".emsg"
|
||||||
|
resp = requests.post(
|
||||||
|
UPLOAD_URL,
|
||||||
|
headers={"Authorization": f"Bearer {TOKEN}"},
|
||||||
|
files={"file": (enc_filename, encrypted, "application/octet-stream")},
|
||||||
|
data={"folder": folder},
|
||||||
|
timeout=60,
|
||||||
|
)
|
||||||
|
if not resp.ok:
|
||||||
|
raise requests.HTTPError(f"{resp.status_code} {resp.reason} | {resp.text[:200]}")
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
|
||||||
|
def upload_db(db_path):
|
||||||
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
filename = f"jnjemails_{ts}.db"
|
||||||
|
try:
|
||||||
|
with open(db_path, "rb") as f:
|
||||||
|
resp = requests.post(
|
||||||
|
DB_UPLOAD_URL,
|
||||||
|
headers={"Authorization": f"Bearer {TOKEN}"},
|
||||||
|
files={"file": (filename, f, "application/octet-stream")},
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
print(f" DB upload: {resp.json()}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" DB upload CHYBA: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def capture_new(conn, run_id, item, mid, current, is_read, subject, stats):
|
||||||
|
"""Novy email: SaveAs -> upload -> insert. Vraci True pri uspechu."""
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
safe = f"{item.EntryID[-20:]}.msg"
|
||||||
|
p = Path(tmp) / safe
|
||||||
|
item.SaveAs(str(p), OLSAVE_MSG)
|
||||||
|
result = upload_msg(p, safe, current)
|
||||||
|
graph_id = result.get("graph_id")
|
||||||
|
try:
|
||||||
|
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
|
||||||
|
except Exception:
|
||||||
|
received = None
|
||||||
|
try:
|
||||||
|
sender = item.SenderEmailAddress or ""
|
||||||
|
except Exception:
|
||||||
|
sender = ""
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT OR IGNORE INTO messages
|
||||||
|
(message_id, subject, sender, received_at, folder, source,
|
||||||
|
entry_id, graph_id, is_read, jnj_folder,
|
||||||
|
not_in_mailbox_anymore, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, datetime('now'))""",
|
||||||
|
(mid, subject, sender, received, current, SCRIPT_NAME,
|
||||||
|
item.EntryID, graph_id, is_read, current),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
info(conn, run_id, "captured", subject=subject, folder=current, graph_id=graph_id)
|
||||||
|
print(f" NEW | {subject[:70]}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def process_item(conn, run_id, item, current, stats, seen, mode, dry):
|
||||||
|
try:
|
||||||
|
mid = get_mid(item)
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
seen.add(mid)
|
||||||
|
stats["found"] += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_read = 0 if item.UnRead else 1
|
||||||
|
except Exception:
|
||||||
|
is_read = 0
|
||||||
|
subject = str(getattr(item, "Subject", "") or "")
|
||||||
|
|
||||||
|
row = db_get(conn, mid)
|
||||||
|
|
||||||
|
# ── Novy email (neni v DB) ────────────────────────────────────────────
|
||||||
|
if row is None:
|
||||||
|
if mode in ("capture", "full-update"):
|
||||||
|
if dry:
|
||||||
|
stats["new_captured"] += 1
|
||||||
|
print(f" NEW* | {subject[:70]}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
if capture_new(conn, run_id, item, mid, current, is_read, subject, stats):
|
||||||
|
stats["new_captured"] += 1
|
||||||
|
except Exception as e:
|
||||||
|
stats["errors"] += 1
|
||||||
|
error(conn, run_id, "capture_error", subject=subject, folder=current, detail=str(e))
|
||||||
|
print(f" CHYBA NEW | {subject[:50]} | {e}")
|
||||||
|
else: # update-paths — telo nemame, nelze dorovnat
|
||||||
|
stats["new_uncaptured"] += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
# ── Znamy email — porovnej zmeny ──────────────────────────────────────
|
||||||
|
changes = {}
|
||||||
|
current_known = row.get("jnj_folder") or row.get("folder")
|
||||||
|
if current_known != current:
|
||||||
|
changes["jnj_folder"] = current
|
||||||
|
stats["path_updated"] += 1
|
||||||
|
if row.get("is_read") != is_read:
|
||||||
|
changes["is_read"] = is_read
|
||||||
|
stats["read_updated"] += 1
|
||||||
|
if row.get("not_in_mailbox_anymore"):
|
||||||
|
changes["not_in_mailbox_anymore"] = 0
|
||||||
|
changes["left_mailbox_at"] = None
|
||||||
|
stats["returned"] += 1
|
||||||
|
|
||||||
|
if changes:
|
||||||
|
if not dry:
|
||||||
|
apply_update(conn, mid, changes)
|
||||||
|
what = []
|
||||||
|
if "jnj_folder" in changes:
|
||||||
|
what.append(f"-> {current}")
|
||||||
|
if "is_read" in changes:
|
||||||
|
what.append("precteno" if is_read else "neprecteno")
|
||||||
|
if "not_in_mailbox_anymore" in changes:
|
||||||
|
what.append("vraceno do schranky")
|
||||||
|
marker = "*" if dry else " "
|
||||||
|
print(f" UPD{marker} | {subject[:55]} | {', '.join(what)}")
|
||||||
|
info(conn, run_id, "path_update", subject=subject, folder=current, detail="; ".join(what))
|
||||||
|
else:
|
||||||
|
stats["skipped"] += 1
|
||||||
|
|
||||||
|
|
||||||
|
def walk(conn, run_id, folder, folder_path, cutoff_local, stats, seen, mode, dry, limit):
|
||||||
|
current = f"{folder_path}/{folder.Name}"
|
||||||
|
try:
|
||||||
|
items = folder.Items
|
||||||
|
if cutoff_local is not None:
|
||||||
|
restrict = ("@SQL=\"urn:schemas:httpmail:datereceived\" >= '%s'"
|
||||||
|
% cutoff_local.strftime("%Y/%m/%d %H:%M:%S"))
|
||||||
|
items = items.Restrict(restrict)
|
||||||
|
items.Sort("[ReceivedTime]", True) # newest first
|
||||||
|
except Exception as e:
|
||||||
|
print(f" CHYBA slozka {current}: {e}")
|
||||||
|
error(conn, run_id, "folder_error", folder=current, detail=str(e))
|
||||||
|
return
|
||||||
|
|
||||||
|
n = 0
|
||||||
|
for item in items:
|
||||||
|
if limit and stats["found"] >= limit:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
if not str(getattr(item, "MessageClass", "")).upper().startswith("IPM.NOTE"):
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
process_item(conn, run_id, item, current, stats, seen, mode, dry)
|
||||||
|
n += 1
|
||||||
|
|
||||||
|
print(f" {current}: {n} polozek")
|
||||||
|
info(conn, run_id, "folder_done", folder=current, detail=str(n))
|
||||||
|
|
||||||
|
try:
|
||||||
|
subs = list(folder.Folders)
|
||||||
|
except Exception:
|
||||||
|
subs = []
|
||||||
|
for sub in subs:
|
||||||
|
if limit and stats["found"] >= limit:
|
||||||
|
break
|
||||||
|
walk(conn, run_id, sub, current, cutoff_local, stats, seen, mode, dry, limit)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_dt(s):
|
||||||
|
if not s:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(s)
|
||||||
|
if dt.tzinfo:
|
||||||
|
dt = dt.astimezone().replace(tzinfo=None)
|
||||||
|
return dt
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def flag_left_mailbox(conn, run_id, cutoff_local, seen, scanned_roots, stats, dry):
|
||||||
|
"""Emaily v DB v okne, ktere jsme ve SKENOVANE casti schranky (Inbox/Sent)
|
||||||
|
NEvideli -> opustily pracovni schranku. Ponecha posledni znamou cestu,
|
||||||
|
nastavi priznak.
|
||||||
|
|
||||||
|
DULEZITE: hodnotime JEN emaily, jejichz POSLEDNI ZNAMA cesta je pod nekterym
|
||||||
|
skenovanym korenem (scanned_roots = Inbox/Sent/Deleted Items primarni
|
||||||
|
schranky). Emaily naposledy videne MIMO skenovany rozsah (Archive, Online
|
||||||
|
Archive, Junk, Drafts, Sync Issues, vlastni top-level slozky, ...) se
|
||||||
|
NEhodnoti — tam jsme je necekali, takze jejich absence nic neznamena (jinak
|
||||||
|
falesne GONE). Pozn.: po vyprazdneni Deleted Items se tamni maily korektne
|
||||||
|
oznaci GONE (posledni cesta /Deleted Items zustane)."""
|
||||||
|
cur = conn.execute(
|
||||||
|
"""SELECT message_id, received_at, jnj_folder, folder, not_in_mailbox_anymore
|
||||||
|
FROM messages""")
|
||||||
|
to_flag = []
|
||||||
|
for mid, received_at, jnjf, fld, flag in cur.fetchall():
|
||||||
|
if mid in seen or flag:
|
||||||
|
continue
|
||||||
|
path = jnjf or fld or ""
|
||||||
|
if not any(path.startswith(root) for root in scanned_roots):
|
||||||
|
continue # posledni znama cesta mimo skenovany rozsah -> nehodnotime
|
||||||
|
rec = _parse_dt(received_at)
|
||||||
|
if rec is None or rec < cutoff_local:
|
||||||
|
continue # mimo okno / neparsovatelne -> nehodnotime
|
||||||
|
to_flag.append((mid, path))
|
||||||
|
|
||||||
|
for mid, path in to_flag:
|
||||||
|
if not dry:
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE messages SET not_in_mailbox_anymore=1,
|
||||||
|
left_mailbox_at=datetime('now'), updated_at=datetime('now')
|
||||||
|
WHERE message_id=?""", (mid,))
|
||||||
|
stats["left_mailbox"] += 1
|
||||||
|
print(f" GONE{'*' if dry else ' '} | {path}")
|
||||||
|
if not dry and to_flag:
|
||||||
|
conn.commit()
|
||||||
|
info(conn, run_id, "left_mailbox", detail=str(len(to_flag)))
|
||||||
|
|
||||||
|
|
||||||
|
# ─── MAIN ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser(description=f"jnj_mailbox_sync v{SCRIPT_VERSION}")
|
||||||
|
ap.add_argument("--mode", choices=["capture", "update-paths", "full-update"],
|
||||||
|
default="capture")
|
||||||
|
ap.add_argument("--days", type=int, default=30,
|
||||||
|
help="Okno ve dnech pro update-paths/full-update (0 = vse)")
|
||||||
|
ap.add_argument("--dry-run", action="store_true",
|
||||||
|
help="Nic nezapise/nenahraje, jen vypise co by udelal")
|
||||||
|
ap.add_argument("--limit", type=int, default=0, help="Max N polozek (test)")
|
||||||
|
ap.add_argument("--no-db-upload", action="store_true")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
mode, dry = args.mode, args.dry_run
|
||||||
|
|
||||||
|
# capture ignoruje okno (bere vse); ostatni rezimy okno pouzivaji (0 = vse)
|
||||||
|
if mode == "capture":
|
||||||
|
cutoff_local = None
|
||||||
|
else:
|
||||||
|
cutoff_local = None if args.days == 0 else (datetime.now() - timedelta(days=args.days))
|
||||||
|
|
||||||
|
win = "vse" if cutoff_local is None else f"{args.days} dni (od {cutoff_local:%Y-%m-%d %H:%M})"
|
||||||
|
print(f"=== jnj_mailbox_sync v{SCRIPT_VERSION} ===")
|
||||||
|
print(f"Start: {datetime.now():%Y-%m-%d %H:%M:%S}")
|
||||||
|
print(f"Rezim: {mode} Okno: {win} {'[DRY-RUN — nic se nemeni]' if dry else ''}")
|
||||||
|
print(f"DB: {DB_PATH}")
|
||||||
|
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
init_db(conn)
|
||||||
|
run_id = start_run(conn, mode, args.days, dry)
|
||||||
|
|
||||||
|
outlook = win32com.client.Dispatch("Outlook.Application")
|
||||||
|
ns = outlook.GetNamespace("MAPI")
|
||||||
|
|
||||||
|
stats = {"found": 0, "new_captured": 0, "new_uncaptured": 0, "path_updated": 0,
|
||||||
|
"read_updated": 0, "returned": 0, "left_mailbox": 0, "skipped": 0, "errors": 0}
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
scanned_roots = set()
|
||||||
|
for fid, label in SYNC_FOLDERS:
|
||||||
|
root = ns.GetDefaultFolder(fid)
|
||||||
|
mailbox = root.Parent.Name
|
||||||
|
scanned_roots.add(f"/{mailbox}/{root.Name}")
|
||||||
|
print(f"\n=== {label} ({mailbox}) ===")
|
||||||
|
walk(conn, run_id, root, f"/{mailbox}", cutoff_local, stats, seen, mode, dry, args.limit)
|
||||||
|
|
||||||
|
# Detekce "opustilo schranku" — jen oknove rezimy s platnym cutoff.
|
||||||
|
# Hodnoti jen emaily naposledy videne pod scanned_roots (Inbox/Sent/Deleted).
|
||||||
|
if mode in ("update-paths", "full-update") and cutoff_local is not None and not (args.limit):
|
||||||
|
print("\n--- Kontrola 'opustilo schranku' (v okne, Inbox/Sent/Deleted) ---")
|
||||||
|
flag_left_mailbox(conn, run_id, cutoff_local, seen, scanned_roots, stats, dry)
|
||||||
|
elif args.limit:
|
||||||
|
print("\n(--limit aktivni -> detekce 'opustilo schranku' preskocena)")
|
||||||
|
|
||||||
|
finish_run(conn, run_id, stats)
|
||||||
|
|
||||||
|
# ── Souhrn ─────────────────────────────────────────────────────────────
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"SOUHRN [{mode}{' / DRY-RUN' if dry else ''}]")
|
||||||
|
print(f" Nalezeno ve schrance: {stats['found']}")
|
||||||
|
if mode in ("capture", "full-update"):
|
||||||
|
lbl = "by se nahralo" if dry else "nahrano"
|
||||||
|
print(f" Nove zachyceno ({lbl}): {stats['new_captured']}")
|
||||||
|
else:
|
||||||
|
print(f" Nove (bez tela, nedorovnano):{stats['new_uncaptured']}")
|
||||||
|
print(f" Aktualizovana cesta: {stats['path_updated']}")
|
||||||
|
print(f" Zmena precteno/neprecteno: {stats['read_updated']}")
|
||||||
|
print(f" Vraceno do schranky: {stats['returned']}")
|
||||||
|
print(f" Opustilo schranku (GONE): {stats['left_mailbox']}")
|
||||||
|
print(f" Beze zmeny (skip): {stats['skipped']}")
|
||||||
|
print(f" Chyby: {stats['errors']}")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
|
||||||
|
if dry:
|
||||||
|
print("DRY-RUN: SQLite ani server se NEMENILY.")
|
||||||
|
elif not args.no_db_upload:
|
||||||
|
print("\nUpload SQLite na server...")
|
||||||
|
upload_db(DB_PATH)
|
||||||
|
|
||||||
|
print(f"\nKonec: {datetime.now():%Y-%m-%d %H:%M:%S}")
|
||||||
|
if stats["errors"]:
|
||||||
|
print(f"Chyby logovany do: {LOG_PATH}")
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
# jnj_mailbox_sync v1.2.0
|
||||||
|
|
||||||
|
**Soubor:** `jnj_mailbox_sync_v1.2.py`
|
||||||
|
**Datum:** 2026-06-10
|
||||||
|
**Autor:** vladimir.buzalka
|
||||||
|
**Běží:** JNJ stroj (Outlook MAPI), Python z Thonny.
|
||||||
|
|
||||||
|
## Co to je
|
||||||
|
|
||||||
|
Synchronizace JNJ Outlooku (MAPI) → osobní schránka (přes msgreceiver) + bookkeeping
|
||||||
|
v SQLite (`C:\Users\vbuzalka\SQLITE\jnjemails.db`). Sleduje přesuny e-mailů mezi
|
||||||
|
složkami a příznak „už není ve schránce" — bez opětovného přenosu těla.
|
||||||
|
Skenované složky: **Inbox + Sent Items + Deleted Items** (vč. podsložek).
|
||||||
|
|
||||||
|
## Novinka v1.2 — komprimovaný + šifrovaný upload SQLite
|
||||||
|
|
||||||
|
Dřív se ~37 MB SQLite posílalo na `/upload-db` **plain** (jen HTTPS+token).
|
||||||
|
Teď `upload_db()`:
|
||||||
|
|
||||||
|
1. **Komprese na max** — `lzma` (xz), `preset 9 | PRESET_EXTREME` (stdlib).
|
||||||
|
2. **Šifrování** — stávající Fernet (klíč odvozený z TOKENu, `sha256 → urlsafe_b64`).
|
||||||
|
3. Upload jako `jnjemails_<ts>.db.xz.enc`.
|
||||||
|
|
||||||
|
Přijímací **msgreceiver `/upload-db` (app.py ≥ v2.1)** soubor Fernetem dešifruje,
|
||||||
|
lzma rozbalí a uloží plain `.db` do `/msgs/db`. Domácí `jnj_tower_ingest` tím pádem
|
||||||
|
**zůstává beze změny** (čte nejnovější plain `.db` read-only).
|
||||||
|
|
||||||
|
Důvod šifrování: bezpečný průchod přes JNJ proxy (Zscaler/DLP) — stejný vzor jako
|
||||||
|
`.emsg` u jednotlivých `.msg`. Round-trip ověřen (bajt na bajt).
|
||||||
|
|
||||||
|
## Závislost na serveru
|
||||||
|
|
||||||
|
⚠️ Vyžaduje **msgreceiver app.py ≥ v2.1**. Server bere `.db.xz.enc` i starý plain `.db`,
|
||||||
|
takže nasazovací pořadí je **server → JNJ** bez výpadku.
|
||||||
|
|
||||||
|
## Argumenty
|
||||||
|
|
||||||
|
`--mode {capture,update-paths,full-update}` (default capture), `--days N`
|
||||||
|
(0 = celé), `--dry-run`, `--limit N`, `--no-db-upload`.
|
||||||
|
|
||||||
|
## Spouštění (JNJ stroj, plné cesty)
|
||||||
|
|
||||||
|
```
|
||||||
|
"C:\Users\vbuzalka\AppData\Local\Programs\Thonny\python.exe" "c:\Users\vbuzalka\OneDrive - JNJ\##JNJPrenos\Python\jnj_mailbox_sync_v1.2.py" --mode full-update --days 30
|
||||||
|
```
|
||||||
|
|
||||||
|
## Revert
|
||||||
|
|
||||||
|
Stará verze: `Trash/jnj_mailbox_sync_v1.1.py` (plain DB upload). Server zůstává
|
||||||
|
zpětně kompatibilní, takže revert na JNJ straně nevyžaduje zásah na serveru.
|
||||||
|
|
||||||
|
## Historie
|
||||||
|
|
||||||
|
- **1.0.0** — režimy capture/update-paths/full-update, sledování přesunů, updated_at.
|
||||||
|
- **1.1.0** — + Deleted Items do skenovaných složek.
|
||||||
|
- **1.2.0** — upload SQLite komprimován (lzma/xz max) + šifrován (Fernet) → `.db.xz.enc`;
|
||||||
|
vyžaduje msgreceiver app.py ≥ v2.1.
|
||||||
@@ -0,0 +1,604 @@
|
|||||||
|
"""
|
||||||
|
jnj_mailbox_sync v1.2
|
||||||
|
Nazev: jnj_mailbox_sync_v1.2.py
|
||||||
|
Verze: 1.2.0
|
||||||
|
Datum: 2026-06-10
|
||||||
|
Autor: vladimir.buzalka
|
||||||
|
|
||||||
|
Popis:
|
||||||
|
Synchronizace JNJ Outlooku (MAPI) -> osobni schranka + bookkeeping v SQLite.
|
||||||
|
Nasledník inbox_full_sync_v1.1. Nove navic sleduje PRESUN emailu mezi
|
||||||
|
slozkami a priznak "uz neni ve schrance" — BEZ opetovneho prenosu tela.
|
||||||
|
|
||||||
|
Scope: primarni schranka, Inbox + Sent Items + Deleted Items vcetne vsech
|
||||||
|
podsložek. (v1.1: pridano Deleted Items — uzivatel po precteni maily MAZE,
|
||||||
|
takze precteny-smazany mail se ted sleduje jako /Deleted Items misto aby
|
||||||
|
skoncil jako "ghost" s posledni cestou /Inbox.)
|
||||||
|
Online Archive se NEskenuje — firemni pravidla tam presouvaji nejstarsi
|
||||||
|
emaily, ktere uz mame davno stazene. Kdyz email ze skenovane schranky
|
||||||
|
zmizi (presun do nesken. slozky / vyprazdneni Deleted), ponecha se POSLEDNI
|
||||||
|
ZNAMA cesta a nastavi se priznak not_in_mailbox_anymore=1.
|
||||||
|
|
||||||
|
Identita emailu = Internet Message-ID (stabilni pres presuny). EntryID se
|
||||||
|
pri presunu meni — drzime ho jen jako pomocny.
|
||||||
|
|
||||||
|
Sloupce cest v SQLite:
|
||||||
|
folder = cesta pri PRVNIM zachyceni (historie, neprepisuje se)
|
||||||
|
jnj_folder = AKTUALNI ziva cesta (prepisuje se pri presunu)
|
||||||
|
Sloupec updated_at se bumpne pri insertu i kazde zmene — slouzi pro
|
||||||
|
inkrementalni sync na domaci strane (watermark).
|
||||||
|
|
||||||
|
Upload SQLite (v1.2): DB se pred odeslanim KOMPRIMUJE (lzma/xz, max) a
|
||||||
|
SIFRUJE (Fernet, klic z TOKENu) a nahrava jako .db.xz.enc. Server
|
||||||
|
(msgreceiver /upload-db) ji desifruje + rozbali zpet na plain .db do
|
||||||
|
/msgs/db. Sifruje se kvuli prenosu pres JNJ proxy (Zscaler) — stejny
|
||||||
|
vzor jako .emsg u .msg. ~37 MB DB se scvrkne na jednotky MB.
|
||||||
|
|
||||||
|
Rezimy (--mode):
|
||||||
|
capture (default) Projde cely Inbox+Sent, nove emaily ulozi a nahraje
|
||||||
|
(jako inbox_full_sync). Okno --days se IGNORUJE (bere VSE).
|
||||||
|
Detekce "opustilo schranku" se v tomto rezimu NEdela (neskenuje
|
||||||
|
se archiv, takze by to delalo falesne poplachy).
|
||||||
|
update-paths Jen METADATA. Projde okno poslednich --days dni, aktualizuje
|
||||||
|
cesty/precteno znamych emailu a oznaci ty, co ze schranky
|
||||||
|
zmizely. NIC nenahrava (zadny .msg upload).
|
||||||
|
full-update update-paths + navic dorovna chybejici emaily (SaveAs+upload).
|
||||||
|
|
||||||
|
Argumenty:
|
||||||
|
--mode {capture,update-paths,full-update} default capture
|
||||||
|
--days N velikost okna ve dnech (default 30). 0 = cely Inbox+Sent.
|
||||||
|
--dry-run NIC nezapise/nenahraje, jen vypise co by udelal (+ souhrn).
|
||||||
|
--limit N zpracovat max N polozek (rychly test).
|
||||||
|
--no-db-upload na konci nenahravat SQLite na server.
|
||||||
|
|
||||||
|
Spousteni:
|
||||||
|
# 1) Nejdriv si PRECIST, co by full-update prinesl (NIC nezmeni):
|
||||||
|
python jnj_mailbox_sync_v1.2.py --mode full-update --days 30 --dry-run
|
||||||
|
|
||||||
|
# 2) Pak naostro:
|
||||||
|
python jnj_mailbox_sync_v1.2.py --mode full-update --days 30
|
||||||
|
|
||||||
|
Zavislosti:
|
||||||
|
pywin32, requests, cryptography, sqlite3 + lzma (stdlib).
|
||||||
|
Python 3.10+, Windows, Outlook musi byt spusteny a prihlaseny.
|
||||||
|
|
||||||
|
Historie verzi:
|
||||||
|
1.0.0 2026-06-09 Nova generace: rezimy capture/update-paths/full-update,
|
||||||
|
sledovani presunu (jnj_folder), priznak
|
||||||
|
not_in_mailbox_anymore, sloupec updated_at pro
|
||||||
|
inkrementalni sync domu. Nasledník inbox_full_sync_v1.1.
|
||||||
|
1.1.0 2026-06-10 + Deleted Items do SYNC_FOLDERS (olFolderDeletedItems=3).
|
||||||
|
Precteny-smazany mail se ted sleduje jako /Deleted Items;
|
||||||
|
drive ghost s posledni cestou /Inbox. Pri 1. behu se
|
||||||
|
drive zghostovane maily najdou v Deleted -> jnj_folder
|
||||||
|
opraven na /Deleted Items + not_in_mailbox_anymore=0.
|
||||||
|
1.2.0 2026-06-10 Upload SQLite KOMPRIMOVAN (lzma/xz max) + SIFROVAN
|
||||||
|
(Fernet) -> .db.xz.enc. Server desifruje+rozbali zpet
|
||||||
|
na .db. Drive se ~37 MB DB posilalo plain; ted jednotky
|
||||||
|
MB sifrovane (bypass JNJ proxy). Vyzaduje msgreceiver
|
||||||
|
app.py >= v2.1 (umi .db.xz.enc; zpetne bere i plain .db).
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import lzma
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import win32com.client
|
||||||
|
import requests
|
||||||
|
import urllib3
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
|
||||||
|
if hasattr(sys.stdout, "reconfigure"):
|
||||||
|
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
# ─── KONFIGURACE ──────────────────────────────────────────────────────────────
|
||||||
|
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
|
||||||
|
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
|
||||||
|
DB_UPLOAD_URL = "https://msgs.buzalka.cz/upload-db"
|
||||||
|
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
|
||||||
|
LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnj_mailbox_sync_errors.log"
|
||||||
|
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
|
||||||
|
SCRIPT_NAME = "jnj_mailbox_sync"
|
||||||
|
SCRIPT_VERSION = "1.2.0"
|
||||||
|
|
||||||
|
# olFolderInbox=6, olFolderSentMail=5, olFolderDeletedItems=3
|
||||||
|
SYNC_FOLDERS = [(6, "Inbox"), (5, "Sent Items"), (3, "Deleted Items")]
|
||||||
|
OLSAVE_MSG = 3 # OlSaveAsType.olMSG
|
||||||
|
|
||||||
|
# Sifrovaci klic odvozeny z TOKENu (stejny algoritmus jako server)
|
||||||
|
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
filename=LOG_PATH,
|
||||||
|
level=logging.ERROR,
|
||||||
|
format="%(asctime)s | %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
# ─── SQLite ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def init_db(conn):
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS messages (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
message_id TEXT NOT NULL,
|
||||||
|
subject TEXT,
|
||||||
|
sender TEXT,
|
||||||
|
received_at TEXT,
|
||||||
|
folder TEXT,
|
||||||
|
source TEXT,
|
||||||
|
uploaded_at TEXT DEFAULT (datetime('now')),
|
||||||
|
entry_id TEXT,
|
||||||
|
graph_id TEXT,
|
||||||
|
is_read INTEGER DEFAULT 0,
|
||||||
|
jnj_folder TEXT,
|
||||||
|
not_in_mailbox_anymore INTEGER DEFAULT 0,
|
||||||
|
left_mailbox_at TEXT,
|
||||||
|
updated_at TEXT
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
|
||||||
|
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS runs (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
script TEXT NOT NULL,
|
||||||
|
version TEXT,
|
||||||
|
started_at TEXT NOT NULL,
|
||||||
|
finished_at TEXT,
|
||||||
|
mode TEXT,
|
||||||
|
window_days INTEGER,
|
||||||
|
dry_run INTEGER DEFAULT 0,
|
||||||
|
found INTEGER DEFAULT 0,
|
||||||
|
new_captured INTEGER DEFAULT 0,
|
||||||
|
path_updated INTEGER DEFAULT 0,
|
||||||
|
read_updated INTEGER DEFAULT 0,
|
||||||
|
returned INTEGER DEFAULT 0,
|
||||||
|
left_mailbox INTEGER DEFAULT 0,
|
||||||
|
skipped INTEGER DEFAULT 0,
|
||||||
|
errors INTEGER DEFAULT 0
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS log (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
run_id INTEGER REFERENCES runs(id),
|
||||||
|
level TEXT NOT NULL,
|
||||||
|
event TEXT NOT NULL,
|
||||||
|
subject TEXT,
|
||||||
|
folder TEXT,
|
||||||
|
graph_id TEXT,
|
||||||
|
detail TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now'))
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_log_run_id ON log(run_id)")
|
||||||
|
|
||||||
|
# Migrace existujici jnjemails.db (z inbox_full_sync) — pridej chybejici sloupce
|
||||||
|
for col, ddl in [
|
||||||
|
("entry_id", "TEXT"), ("graph_id", "TEXT"), ("is_read", "INTEGER DEFAULT 0"),
|
||||||
|
("jnj_folder", "TEXT"), ("not_in_mailbox_anymore", "INTEGER DEFAULT 0"),
|
||||||
|
("left_mailbox_at", "TEXT"), ("updated_at", "TEXT"),
|
||||||
|
]:
|
||||||
|
try:
|
||||||
|
conn.execute(f"ALTER TABLE messages ADD COLUMN {col} {ddl}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
for col, ddl in [
|
||||||
|
("mode", "TEXT"), ("window_days", "INTEGER"), ("dry_run", "INTEGER DEFAULT 0"),
|
||||||
|
("found", "INTEGER DEFAULT 0"), ("new_captured", "INTEGER DEFAULT 0"),
|
||||||
|
("path_updated", "INTEGER DEFAULT 0"), ("read_updated", "INTEGER DEFAULT 0"),
|
||||||
|
("returned", "INTEGER DEFAULT 0"), ("left_mailbox", "INTEGER DEFAULT 0"),
|
||||||
|
]:
|
||||||
|
try:
|
||||||
|
conn.execute(f"ALTER TABLE runs ADD COLUMN {col} {ddl}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Indexy na sloupce, ktere mohly vzniknout az migraci vyse
|
||||||
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_updated_at ON messages(updated_at)")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def start_run(conn, mode, days, dry):
|
||||||
|
cur = conn.execute(
|
||||||
|
"""INSERT INTO runs (script, version, started_at, mode, window_days, dry_run)
|
||||||
|
VALUES (?, ?, datetime('now'), ?, ?, ?)""",
|
||||||
|
(SCRIPT_NAME, SCRIPT_VERSION, mode, days, 1 if dry else 0),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return cur.lastrowid
|
||||||
|
|
||||||
|
|
||||||
|
def finish_run(conn, run_id, stats):
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE runs SET finished_at=datetime('now'),
|
||||||
|
found=?, new_captured=?, path_updated=?, read_updated=?,
|
||||||
|
returned=?, left_mailbox=?, skipped=?, errors=?
|
||||||
|
WHERE id=?""",
|
||||||
|
(stats["found"], stats["new_captured"], stats["path_updated"],
|
||||||
|
stats["read_updated"], stats["returned"], stats["left_mailbox"],
|
||||||
|
stats["skipped"], stats["errors"], run_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def db_log(conn, run_id, level, event, subject=None, folder=None, graph_id=None, detail=None):
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO log (run_id, level, event, subject, folder, graph_id, detail)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
||||||
|
(run_id, level, event, subject, folder, graph_id, detail),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def info(conn, run_id, event, **kw):
|
||||||
|
db_log(conn, run_id, "INFO", event, **kw)
|
||||||
|
|
||||||
|
|
||||||
|
def error(conn, run_id, event, **kw):
|
||||||
|
db_log(conn, run_id, "ERROR", event, **kw)
|
||||||
|
|
||||||
|
|
||||||
|
def db_get(conn, mid):
|
||||||
|
cur = conn.execute(
|
||||||
|
"""SELECT message_id, folder, jnj_folder, is_read, not_in_mailbox_anymore
|
||||||
|
FROM messages WHERE message_id=?""", (mid,))
|
||||||
|
r = cur.fetchone()
|
||||||
|
if not r:
|
||||||
|
return None
|
||||||
|
return {"message_id": r[0], "folder": r[1], "jnj_folder": r[2],
|
||||||
|
"is_read": r[3], "not_in_mailbox_anymore": r[4]}
|
||||||
|
|
||||||
|
|
||||||
|
def apply_update(conn, mid, changes):
|
||||||
|
sets, vals = [], []
|
||||||
|
for k, v in changes.items():
|
||||||
|
sets.append(f"{k}=?")
|
||||||
|
vals.append(v)
|
||||||
|
sets.append("updated_at=datetime('now')")
|
||||||
|
vals.append(mid)
|
||||||
|
conn.execute(f"UPDATE messages SET {', '.join(sets)} WHERE message_id=?", vals)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Outlook / prenos ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def get_mid(item) -> str:
|
||||||
|
try:
|
||||||
|
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
|
||||||
|
except Exception:
|
||||||
|
mid = None
|
||||||
|
return mid or f"entryid:{item.EntryID}"
|
||||||
|
|
||||||
|
|
||||||
|
def upload_msg(msg_path, filename, folder=""):
|
||||||
|
with open(msg_path, "rb") as f:
|
||||||
|
encrypted = _FERNET.encrypt(f.read())
|
||||||
|
enc_filename = Path(filename).stem + ".emsg"
|
||||||
|
resp = requests.post(
|
||||||
|
UPLOAD_URL,
|
||||||
|
headers={"Authorization": f"Bearer {TOKEN}"},
|
||||||
|
files={"file": (enc_filename, encrypted, "application/octet-stream")},
|
||||||
|
data={"folder": folder},
|
||||||
|
timeout=60,
|
||||||
|
)
|
||||||
|
if not resp.ok:
|
||||||
|
raise requests.HTTPError(f"{resp.status_code} {resp.reason} | {resp.text[:200]}")
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
|
||||||
|
def upload_db(db_path):
|
||||||
|
"""Komprese (lzma/xz, max) -> Fernet sifra -> upload jako .db.xz.enc.
|
||||||
|
Server (msgreceiver /upload-db, app.py >= v2.1) data desifruje + rozbali
|
||||||
|
zpet na plain .db do /msgs/db. Sifruje se kvuli prenosu pres JNJ proxy
|
||||||
|
(Zscaler) — stejny vzor jako .emsg u .msg."""
|
||||||
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
filename = f"jnjemails_{ts}.db"
|
||||||
|
try:
|
||||||
|
with open(db_path, "rb") as f:
|
||||||
|
raw = f.read()
|
||||||
|
compressed = lzma.compress(raw, preset=9 | lzma.PRESET_EXTREME)
|
||||||
|
encrypted = _FERNET.encrypt(compressed)
|
||||||
|
enc_filename = filename + ".xz.enc"
|
||||||
|
resp = requests.post(
|
||||||
|
DB_UPLOAD_URL,
|
||||||
|
headers={"Authorization": f"Bearer {TOKEN}"},
|
||||||
|
files={"file": (enc_filename, encrypted, "application/octet-stream")},
|
||||||
|
timeout=300,
|
||||||
|
)
|
||||||
|
mb_raw, mb_xz, mb_enc = (len(raw) / 1048576,
|
||||||
|
len(compressed) / 1048576,
|
||||||
|
len(encrypted) / 1048576)
|
||||||
|
print(f" DB upload: {resp.json()} "
|
||||||
|
f"({mb_raw:.1f} MB -> xz {mb_xz:.1f} MB -> enc {mb_enc:.1f} MB)")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" DB upload CHYBA: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def capture_new(conn, run_id, item, mid, current, is_read, subject, stats):
|
||||||
|
"""Novy email: SaveAs -> upload -> insert. Vraci True pri uspechu."""
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
safe = f"{item.EntryID[-20:]}.msg"
|
||||||
|
p = Path(tmp) / safe
|
||||||
|
item.SaveAs(str(p), OLSAVE_MSG)
|
||||||
|
result = upload_msg(p, safe, current)
|
||||||
|
graph_id = result.get("graph_id")
|
||||||
|
try:
|
||||||
|
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
|
||||||
|
except Exception:
|
||||||
|
received = None
|
||||||
|
try:
|
||||||
|
sender = item.SenderEmailAddress or ""
|
||||||
|
except Exception:
|
||||||
|
sender = ""
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT OR IGNORE INTO messages
|
||||||
|
(message_id, subject, sender, received_at, folder, source,
|
||||||
|
entry_id, graph_id, is_read, jnj_folder,
|
||||||
|
not_in_mailbox_anymore, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, datetime('now'))""",
|
||||||
|
(mid, subject, sender, received, current, SCRIPT_NAME,
|
||||||
|
item.EntryID, graph_id, is_read, current),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
info(conn, run_id, "captured", subject=subject, folder=current, graph_id=graph_id)
|
||||||
|
print(f" NEW | {subject[:70]}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def process_item(conn, run_id, item, current, stats, seen, mode, dry):
|
||||||
|
try:
|
||||||
|
mid = get_mid(item)
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
seen.add(mid)
|
||||||
|
stats["found"] += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_read = 0 if item.UnRead else 1
|
||||||
|
except Exception:
|
||||||
|
is_read = 0
|
||||||
|
subject = str(getattr(item, "Subject", "") or "")
|
||||||
|
|
||||||
|
row = db_get(conn, mid)
|
||||||
|
|
||||||
|
# ── Novy email (neni v DB) ────────────────────────────────────────────
|
||||||
|
if row is None:
|
||||||
|
if mode in ("capture", "full-update"):
|
||||||
|
if dry:
|
||||||
|
stats["new_captured"] += 1
|
||||||
|
print(f" NEW* | {subject[:70]}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
if capture_new(conn, run_id, item, mid, current, is_read, subject, stats):
|
||||||
|
stats["new_captured"] += 1
|
||||||
|
except Exception as e:
|
||||||
|
stats["errors"] += 1
|
||||||
|
error(conn, run_id, "capture_error", subject=subject, folder=current, detail=str(e))
|
||||||
|
print(f" CHYBA NEW | {subject[:50]} | {e}")
|
||||||
|
else: # update-paths — telo nemame, nelze dorovnat
|
||||||
|
stats["new_uncaptured"] += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
# ── Znamy email — porovnej zmeny ──────────────────────────────────────
|
||||||
|
changes = {}
|
||||||
|
current_known = row.get("jnj_folder") or row.get("folder")
|
||||||
|
if current_known != current:
|
||||||
|
changes["jnj_folder"] = current
|
||||||
|
stats["path_updated"] += 1
|
||||||
|
if row.get("is_read") != is_read:
|
||||||
|
changes["is_read"] = is_read
|
||||||
|
stats["read_updated"] += 1
|
||||||
|
if row.get("not_in_mailbox_anymore"):
|
||||||
|
changes["not_in_mailbox_anymore"] = 0
|
||||||
|
changes["left_mailbox_at"] = None
|
||||||
|
stats["returned"] += 1
|
||||||
|
|
||||||
|
if changes:
|
||||||
|
if not dry:
|
||||||
|
apply_update(conn, mid, changes)
|
||||||
|
what = []
|
||||||
|
if "jnj_folder" in changes:
|
||||||
|
what.append(f"-> {current}")
|
||||||
|
if "is_read" in changes:
|
||||||
|
what.append("precteno" if is_read else "neprecteno")
|
||||||
|
if "not_in_mailbox_anymore" in changes:
|
||||||
|
what.append("vraceno do schranky")
|
||||||
|
marker = "*" if dry else " "
|
||||||
|
print(f" UPD{marker} | {subject[:55]} | {', '.join(what)}")
|
||||||
|
info(conn, run_id, "path_update", subject=subject, folder=current, detail="; ".join(what))
|
||||||
|
else:
|
||||||
|
stats["skipped"] += 1
|
||||||
|
|
||||||
|
|
||||||
|
def walk(conn, run_id, folder, folder_path, cutoff_local, stats, seen, mode, dry, limit):
|
||||||
|
current = f"{folder_path}/{folder.Name}"
|
||||||
|
try:
|
||||||
|
items = folder.Items
|
||||||
|
if cutoff_local is not None:
|
||||||
|
restrict = ("@SQL=\"urn:schemas:httpmail:datereceived\" >= '%s'"
|
||||||
|
% cutoff_local.strftime("%Y/%m/%d %H:%M:%S"))
|
||||||
|
items = items.Restrict(restrict)
|
||||||
|
items.Sort("[ReceivedTime]", True) # newest first
|
||||||
|
except Exception as e:
|
||||||
|
print(f" CHYBA slozka {current}: {e}")
|
||||||
|
error(conn, run_id, "folder_error", folder=current, detail=str(e))
|
||||||
|
return
|
||||||
|
|
||||||
|
n = 0
|
||||||
|
for item in items:
|
||||||
|
if limit and stats["found"] >= limit:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
if not str(getattr(item, "MessageClass", "")).upper().startswith("IPM.NOTE"):
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
process_item(conn, run_id, item, current, stats, seen, mode, dry)
|
||||||
|
n += 1
|
||||||
|
|
||||||
|
print(f" {current}: {n} polozek")
|
||||||
|
info(conn, run_id, "folder_done", folder=current, detail=str(n))
|
||||||
|
|
||||||
|
try:
|
||||||
|
subs = list(folder.Folders)
|
||||||
|
except Exception:
|
||||||
|
subs = []
|
||||||
|
for sub in subs:
|
||||||
|
if limit and stats["found"] >= limit:
|
||||||
|
break
|
||||||
|
walk(conn, run_id, sub, current, cutoff_local, stats, seen, mode, dry, limit)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_dt(s):
|
||||||
|
if not s:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(s)
|
||||||
|
if dt.tzinfo:
|
||||||
|
dt = dt.astimezone().replace(tzinfo=None)
|
||||||
|
return dt
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def flag_left_mailbox(conn, run_id, cutoff_local, seen, scanned_roots, stats, dry):
|
||||||
|
"""Emaily v DB v okne, ktere jsme ve SKENOVANE casti schranky (Inbox/Sent)
|
||||||
|
NEvideli -> opustily pracovni schranku. Ponecha posledni znamou cestu,
|
||||||
|
nastavi priznak.
|
||||||
|
|
||||||
|
DULEZITE: hodnotime JEN emaily, jejichz POSLEDNI ZNAMA cesta je pod nekterym
|
||||||
|
skenovanym korenem (scanned_roots = Inbox/Sent/Deleted Items primarni
|
||||||
|
schranky). Emaily naposledy videne MIMO skenovany rozsah (Archive, Online
|
||||||
|
Archive, Junk, Drafts, Sync Issues, vlastni top-level slozky, ...) se
|
||||||
|
NEhodnoti — tam jsme je necekali, takze jejich absence nic neznamena (jinak
|
||||||
|
falesne GONE). Pozn.: po vyprazdneni Deleted Items se tamni maily korektne
|
||||||
|
oznaci GONE (posledni cesta /Deleted Items zustane)."""
|
||||||
|
cur = conn.execute(
|
||||||
|
"""SELECT message_id, received_at, jnj_folder, folder, not_in_mailbox_anymore
|
||||||
|
FROM messages""")
|
||||||
|
to_flag = []
|
||||||
|
for mid, received_at, jnjf, fld, flag in cur.fetchall():
|
||||||
|
if mid in seen or flag:
|
||||||
|
continue
|
||||||
|
path = jnjf or fld or ""
|
||||||
|
if not any(path.startswith(root) for root in scanned_roots):
|
||||||
|
continue # posledni znama cesta mimo skenovany rozsah -> nehodnotime
|
||||||
|
rec = _parse_dt(received_at)
|
||||||
|
if rec is None or rec < cutoff_local:
|
||||||
|
continue # mimo okno / neparsovatelne -> nehodnotime
|
||||||
|
to_flag.append((mid, path))
|
||||||
|
|
||||||
|
for mid, path in to_flag:
|
||||||
|
if not dry:
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE messages SET not_in_mailbox_anymore=1,
|
||||||
|
left_mailbox_at=datetime('now'), updated_at=datetime('now')
|
||||||
|
WHERE message_id=?""", (mid,))
|
||||||
|
stats["left_mailbox"] += 1
|
||||||
|
print(f" GONE{'*' if dry else ' '} | {path}")
|
||||||
|
if not dry and to_flag:
|
||||||
|
conn.commit()
|
||||||
|
info(conn, run_id, "left_mailbox", detail=str(len(to_flag)))
|
||||||
|
|
||||||
|
|
||||||
|
# ─── MAIN ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser(description=f"jnj_mailbox_sync v{SCRIPT_VERSION}")
|
||||||
|
ap.add_argument("--mode", choices=["capture", "update-paths", "full-update"],
|
||||||
|
default="capture")
|
||||||
|
ap.add_argument("--days", type=int, default=30,
|
||||||
|
help="Okno ve dnech pro update-paths/full-update (0 = vse)")
|
||||||
|
ap.add_argument("--dry-run", action="store_true",
|
||||||
|
help="Nic nezapise/nenahraje, jen vypise co by udelal")
|
||||||
|
ap.add_argument("--limit", type=int, default=0, help="Max N polozek (test)")
|
||||||
|
ap.add_argument("--no-db-upload", action="store_true")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
mode, dry = args.mode, args.dry_run
|
||||||
|
|
||||||
|
# capture ignoruje okno (bere vse); ostatni rezimy okno pouzivaji (0 = vse)
|
||||||
|
if mode == "capture":
|
||||||
|
cutoff_local = None
|
||||||
|
else:
|
||||||
|
cutoff_local = None if args.days == 0 else (datetime.now() - timedelta(days=args.days))
|
||||||
|
|
||||||
|
win = "vse" if cutoff_local is None else f"{args.days} dni (od {cutoff_local:%Y-%m-%d %H:%M})"
|
||||||
|
print(f"=== jnj_mailbox_sync v{SCRIPT_VERSION} ===")
|
||||||
|
print(f"Start: {datetime.now():%Y-%m-%d %H:%M:%S}")
|
||||||
|
print(f"Rezim: {mode} Okno: {win} {'[DRY-RUN — nic se nemeni]' if dry else ''}")
|
||||||
|
print(f"DB: {DB_PATH}")
|
||||||
|
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
init_db(conn)
|
||||||
|
run_id = start_run(conn, mode, args.days, dry)
|
||||||
|
|
||||||
|
outlook = win32com.client.Dispatch("Outlook.Application")
|
||||||
|
ns = outlook.GetNamespace("MAPI")
|
||||||
|
|
||||||
|
stats = {"found": 0, "new_captured": 0, "new_uncaptured": 0, "path_updated": 0,
|
||||||
|
"read_updated": 0, "returned": 0, "left_mailbox": 0, "skipped": 0, "errors": 0}
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
scanned_roots = set()
|
||||||
|
for fid, label in SYNC_FOLDERS:
|
||||||
|
root = ns.GetDefaultFolder(fid)
|
||||||
|
mailbox = root.Parent.Name
|
||||||
|
scanned_roots.add(f"/{mailbox}/{root.Name}")
|
||||||
|
print(f"\n=== {label} ({mailbox}) ===")
|
||||||
|
walk(conn, run_id, root, f"/{mailbox}", cutoff_local, stats, seen, mode, dry, args.limit)
|
||||||
|
|
||||||
|
# Detekce "opustilo schranku" — jen oknove rezimy s platnym cutoff.
|
||||||
|
# Hodnoti jen emaily naposledy videne pod scanned_roots (Inbox/Sent/Deleted).
|
||||||
|
if mode in ("update-paths", "full-update") and cutoff_local is not None and not (args.limit):
|
||||||
|
print("\n--- Kontrola 'opustilo schranku' (v okne, Inbox/Sent/Deleted) ---")
|
||||||
|
flag_left_mailbox(conn, run_id, cutoff_local, seen, scanned_roots, stats, dry)
|
||||||
|
elif args.limit:
|
||||||
|
print("\n(--limit aktivni -> detekce 'opustilo schranku' preskocena)")
|
||||||
|
|
||||||
|
finish_run(conn, run_id, stats)
|
||||||
|
|
||||||
|
# ── Souhrn ─────────────────────────────────────────────────────────────
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f"SOUHRN [{mode}{' / DRY-RUN' if dry else ''}]")
|
||||||
|
print(f" Nalezeno ve schrance: {stats['found']}")
|
||||||
|
if mode in ("capture", "full-update"):
|
||||||
|
lbl = "by se nahralo" if dry else "nahrano"
|
||||||
|
print(f" Nove zachyceno ({lbl}): {stats['new_captured']}")
|
||||||
|
else:
|
||||||
|
print(f" Nove (bez tela, nedorovnano):{stats['new_uncaptured']}")
|
||||||
|
print(f" Aktualizovana cesta: {stats['path_updated']}")
|
||||||
|
print(f" Zmena precteno/neprecteno: {stats['read_updated']}")
|
||||||
|
print(f" Vraceno do schranky: {stats['returned']}")
|
||||||
|
print(f" Opustilo schranku (GONE): {stats['left_mailbox']}")
|
||||||
|
print(f" Beze zmeny (skip): {stats['skipped']}")
|
||||||
|
print(f" Chyby: {stats['errors']}")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
|
||||||
|
if dry:
|
||||||
|
print("DRY-RUN: SQLite ani server se NEMENILY.")
|
||||||
|
elif not args.no_db_upload:
|
||||||
|
print("\nUpload SQLite na server...")
|
||||||
|
upload_db(DB_PATH)
|
||||||
|
|
||||||
|
print(f"\nKonec: {datetime.now():%Y-%m-%d %H:%M:%S}")
|
||||||
|
if stats["errors"]:
|
||||||
|
print(f"Chyby logovany do: {LOG_PATH}")
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
# jnj_tower_ingest v1.2.0
|
||||||
|
|
||||||
|
**Soubor:** `jnj_tower_ingest_v1.2.py`
|
||||||
|
**Datum:** 2026-06-10
|
||||||
|
**Autor:** vladimir.buzalka
|
||||||
|
**Běží:** Docker kontejner `python-runner` na Unraid Tower (192.168.1.76), u MongoDB.
|
||||||
|
|
||||||
|
## Co to je
|
||||||
|
|
||||||
|
Sjednocený **Tower-side ingest** JNJ e-mailů — tři fáze v jednom běhu (cron `*/5`):
|
||||||
|
|
||||||
|
| Fáze | Co dělá |
|
||||||
|
|---|---|
|
||||||
|
| **1. PARSE** | `.msg` z `/mnt/JNJEMAILS` → tělo do Mongo `emaily."vbuzalka@its.jnj.com"`. Inkrementálně přes mtime watermark (`parse_state`). |
|
||||||
|
| **2. SYNC** | nejnovější SQLite (read-only) → zrcadlo `jnj_messages` + `jnj_folder`/stav do `emaily`. Watermark `updated_at` + `last_db` + **NULL-safe** (viz níže). |
|
||||||
|
| **3. ENRICH** | sdílený `5_enrich_fulltext_emails --mailbox vbuzalka@its.jnj.com` → PG fulltext. Jen když parse přidal nové dokumenty. |
|
||||||
|
|
||||||
|
Pořadí **parse → sync → enrich**. Klíč = Internet Message-ID = Mongo `_id`.
|
||||||
|
|
||||||
|
## NULL-safe sync (v1.2 — oprava nesouladu Sent)
|
||||||
|
|
||||||
|
**Problém:** na JNJ stroji běží vedle `jnj_mailbox_sync` i starý **`inbox_full_sync`**, který
|
||||||
|
zapisuje řádky do SQLite s **`updated_at = NULL`** (stará schémata to pole neměla). Domácí
|
||||||
|
sync přitom filtroval `WHERE updated_at > watermark`, a v SQL je `NULL > x = false` →
|
||||||
|
**všechny NULL řádky tiše vypadly** (měly tělo v Mongu, ale nikdy nedostaly `jnj_folder`).
|
||||||
|
Týkalo se 69 400 ze 70 060 řádků.
|
||||||
|
|
||||||
|
**Oprava:** sync teď bere i řádky s `updated_at IS NULL`, které ještě **nejsou** v
|
||||||
|
`jnj_messages` (zpracují se právě jednou; už zrcadlené NULL řádky se levně přeskočí).
|
||||||
|
Nic se už tiše nezahodí. `last_db` short-circuit zůstává (nezměněná SQLite = okamžitý no-op).
|
||||||
|
|
||||||
|
**Kořen na JNJ straně (mimo tento skript):** ideálně vyřadit/nahradit naplánovaný
|
||||||
|
`inbox_full_sync` za `jnj_mailbox_sync --mode capture` (nastavuje `updated_at`).
|
||||||
|
|
||||||
|
## Argumenty
|
||||||
|
|
||||||
|
`--dry-run`, `--full`, `--limit N`, `--reindex`, `--force` (sync: ignoruj last_db),
|
||||||
|
`--parse-only` / `--sync-only` / `--enrich-only`, `--no-enrich`, `--enrich-always`.
|
||||||
|
|
||||||
|
## Spouštění
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker exec python-runner python3 /scripts/jnj_tower_ingest_v1.2.py # cron
|
||||||
|
docker exec -it python-runner python3 /scripts/jnj_tower_ingest_v1.2.py --dry-run
|
||||||
|
docker exec python-runner python3 /scripts/jnj_tower_ingest_v1.2.py --sync-only --full # backfill
|
||||||
|
```
|
||||||
|
|
||||||
|
## Plánování
|
||||||
|
|
||||||
|
Unraid User Scripts `jnj_state_sync` (cron `*/5`) → wrapper s `flock` volá v1.2.
|
||||||
|
Log jen reálná práce → `/mnt/user/Scripts/logs/jnj_tower_ingest.log`.
|
||||||
|
|
||||||
|
## Revert
|
||||||
|
|
||||||
|
`jnj_tower_ingest_v1.1.py` (bez NULL-safe), `_v1.0.py` (bez enrich),
|
||||||
|
`parse_emails_tower_v1.3.py`, `sync_jnj_state_v1.0.py` zůstávají v `/scripts/`.
|
||||||
|
|
||||||
|
## Historie verzí
|
||||||
|
|
||||||
|
- **1.0.0** — sjednocení parse + sync (mtime watermark).
|
||||||
|
- **1.1.0** — + fáze ENRICH (sdílený `5_enrich --mailbox`).
|
||||||
|
- **1.2.0** — SYNC NULL-safe: bere i `updated_at IS NULL` řádky (jinak je watermark filtr
|
||||||
|
tiše zahazoval → maily měly tělo, ale ne `jnj_folder`). + jednorázový `--full` backfill.
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,36 @@
|
|||||||
|
# janssenpc_file_receive_v1.2
|
||||||
|
|
||||||
|
Stáhne soubory čekající na serveru `msgs.buzalka.cz` do `##JNJPrenos\ZHovorcovic\`.
|
||||||
|
Spouští se ručně na JNJ stroji dle potřeby.
|
||||||
|
|
||||||
|
## Spuštění (JNJ stroj)
|
||||||
|
```
|
||||||
|
C:\Users\vbuzalka\OneDrive - JNJ\##JNJPrenos\Python\python.exe "C:\Users\vbuzalka\OneDrive - JNJ\##JNJPrenos\Python\janssenpc_file_receive_v1.2.py"
|
||||||
|
```
|
||||||
|
(cestu ke skriptu případně upravit podle skutečného umístění)
|
||||||
|
|
||||||
|
## Princip
|
||||||
|
1. `GET /status` → seznam Fernet tokenů (zašifrovaná jména souborů ve frontě
|
||||||
|
Dropbox `UploadToJNJ`). Zscaler vidí jen neprůhledné řetězce.
|
||||||
|
2. Pro každý token `GET /item/{token}` s hlavičkou `Accept: application/json`
|
||||||
|
→ server (app.py >= v2.3) vrátí `{"data": "<fernet_b64>"}`.
|
||||||
|
3. Klient dešifruje `data` (Fernet z TOKENu) → obsah souboru; jméno získá
|
||||||
|
dešifrováním tokenu. Uloží do `ZHovorcovic\` (zamčený soubor → ` (2)` atd.).
|
||||||
|
4. Server po vydání souboru přesouvá originál do `UploadToJNJ/##Trash/`.
|
||||||
|
|
||||||
|
## Proč JSON (v1.2)
|
||||||
|
Korporátní filtr (Zscaler/SiteMinder) blokoval binární downloady — při prvním
|
||||||
|
stažení PDF si odpověď zachytil, sám si zkusil soubor stáhnout znovu (na serveru
|
||||||
|
viditelné jako druhý GET bez auth → 401) a klientovi vrátil
|
||||||
|
`403 Forbidden` + redirect s `?_sm_nck=1`. JSON odpověď (`application/json`)
|
||||||
|
AV sandbox na přílohy nespouští.
|
||||||
|
|
||||||
|
## Vazby
|
||||||
|
- Server: `EmailsImport/DockerCustomApp/app.py` v2.3 (endpoint `/item` — JSON při
|
||||||
|
`Accept: application/json`, jinak binárka pro starší klienty).
|
||||||
|
- Protějšek pro upload: `janssenpc_file_watch.py` / `janssenpc_file_send`.
|
||||||
|
|
||||||
|
## Historie
|
||||||
|
- v1.2 (2026-06-10): přenos obsahu jako JSON (bypass AV sandboxu filtru)
|
||||||
|
- v1.1 (2026-06-08): jména souborů jako Fernet tokeny v URL (bypass DLP)
|
||||||
|
- v1.0: první verze
|
||||||
@@ -0,0 +1,105 @@
|
|||||||
|
# Název: janssenpc_file_receive_v1.2.py
|
||||||
|
# Verze: 1.2
|
||||||
|
# Datum: 2026-06-10
|
||||||
|
# Popis: Stáhne soubory čekající na serveru (msgs.buzalka.cz) do ##JNJPrenos\ZHovorcovic\.
|
||||||
|
# Spouštět ručně dle potřeby.
|
||||||
|
#
|
||||||
|
# Změna v1.2:
|
||||||
|
# Obsah souboru se přenáší jako JSON ({"data": fernet_b64}), ne jako binární
|
||||||
|
# příloha — korporátní filtr (403 + ?_sm_nck=1) blokoval binární downloady
|
||||||
|
# (AV sandbox na "file download"); JSON odpověď inspekci příloh nespouští.
|
||||||
|
# Klient posílá Accept: application/json; server (app.py >= v2.3) podle toho
|
||||||
|
# volí formát, starý binární režim zůstává pro v1.1.
|
||||||
|
#
|
||||||
|
# Změna v1.1:
|
||||||
|
# Jména souborů ze /status jsou Fernet tokeny (zašifrované původní názvy).
|
||||||
|
# Klient je pošle beze změny jako URL token do /item/{token} — Zscaler vidí
|
||||||
|
# jen neprůhledný řetězec, ne skutečné jméno souboru (bypass DLP).
|
||||||
|
# Po stažení a dešifrování obsahu klient dešifruje i token → původní jméno → uloží.
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import requests
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
|
||||||
|
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
|
||||||
|
PENDING_URL = "https://msgs.buzalka.cz/status"
|
||||||
|
DOWNLOAD_URL = "https://msgs.buzalka.cz/item"
|
||||||
|
RECEIVE_DIR = Path(r"C:\Users\vbuzalka\OneDrive - JNJ\##JNJPrenos\ZHovorcovic")
|
||||||
|
LOG_FILE = Path(__file__).parent / "file_send.log"
|
||||||
|
_FERNET = Fernet(base64.urlsafe_b64encode(hashlib.sha256(TOKEN.encode()).digest()))
|
||||||
|
|
||||||
|
|
||||||
|
def log(msg: str):
|
||||||
|
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
line = f"[{ts}] {msg}"
|
||||||
|
print(line)
|
||||||
|
with LOG_FILE.open("a", encoding="utf-8") as lf:
|
||||||
|
lf.write(line + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_dest(directory: Path, filename: str) -> Path:
|
||||||
|
"""Přepíše existující soubor, pokud je zamčený → name (2), (3)..."""
|
||||||
|
dest = directory / filename
|
||||||
|
if not dest.exists():
|
||||||
|
return dest
|
||||||
|
try:
|
||||||
|
dest.open('r+b').close()
|
||||||
|
return dest
|
||||||
|
except PermissionError:
|
||||||
|
pass
|
||||||
|
stem = Path(filename).stem
|
||||||
|
suffix = Path(filename).suffix
|
||||||
|
n = 2
|
||||||
|
while True:
|
||||||
|
candidate = directory / f"{stem} ({n}){suffix}"
|
||||||
|
if not candidate.exists():
|
||||||
|
return candidate
|
||||||
|
try:
|
||||||
|
candidate.open('r+b').close()
|
||||||
|
return candidate
|
||||||
|
except PermissionError:
|
||||||
|
n += 1
|
||||||
|
|
||||||
|
|
||||||
|
log("=== file_receive: Spuštění ===")
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.get(PENDING_URL, headers={"Authorization": f"Bearer {TOKEN}"}, timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
pending = resp.json().get("files", []) # seznam Fernet tokenů
|
||||||
|
log(f"Souborů čeká na serveru: {len(pending)}")
|
||||||
|
except Exception as e:
|
||||||
|
log(f"CHYBA při dotazu na server: {e}")
|
||||||
|
pending = []
|
||||||
|
|
||||||
|
if pending:
|
||||||
|
RECEIVE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
for enc_token in pending:
|
||||||
|
# Dešifruj token → původní jméno souboru (pro log + uložení)
|
||||||
|
try:
|
||||||
|
orig_filename = _FERNET.decrypt(enc_token.encode()).decode()
|
||||||
|
except Exception as e:
|
||||||
|
log(f" CHYBA (dešifrování jména) | {enc_token[:20]}... | {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = requests.get(
|
||||||
|
f"{DOWNLOAD_URL}/{enc_token}",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {TOKEN}",
|
||||||
|
"Accept": "application/json",
|
||||||
|
},
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
decrypted = _FERNET.decrypt(r.json()["data"].encode())
|
||||||
|
dest = resolve_dest(RECEIVE_DIR, orig_filename)
|
||||||
|
dest.write_bytes(decrypted)
|
||||||
|
log(f" STAŽENO | {orig_filename}{' → ' + dest.name if dest.name != orig_filename else ''}")
|
||||||
|
except Exception as e:
|
||||||
|
log(f" CHYBA | {orig_filename} | {e}")
|
||||||
|
|
||||||
|
log("=== file_receive: Hotovo ===")
|
||||||
Reference in New Issue
Block a user