Files
janssen/EmailsImport/Trash/janssenpc_email_send_new_v1.2.py

200 lines
6.6 KiB
Python

"""
janssenpc_email_send_new v1.2
Verze: 1.2
Datum: 2026-05-28
Popis: Prochází pouze složku Inbox v Outlooku (MAPI), ukládá emailové zprávy jako .msg
soubory a uploaduje je na https://msgs.buzalka.cz. Zaznamenává zpracované
zprávy do SQLite DB (jnjemails.db) a DB periodicky uploaduje na server.
Podporuje pokračování od posledního zpracovaného emailu (resume).
Chyby při uploadu se logují do souboru jnjemails_errors.log.
Oprava v1.2: folder cesta obsahuje celé jméno schránky (např. /vbuzalka@its.jnj.com/Inbox)
aby resume logika správně navazovala na záznamy z původního skriptu.
"""
import win32com.client
import requests
import sqlite3
import urllib3
import logging
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
import io
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails_errors.log"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
logging.basicConfig(
filename=LOG_PATH,
level=logging.ERROR,
format="%(asctime)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
encoding="utf-8",
)
def init_db(conn):
conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT NOT NULL,
subject TEXT,
sender TEXT,
received_at TEXT,
folder TEXT,
source TEXT,
uploaded_at TEXT DEFAULT (datetime('now'))
)
""")
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
conn.commit()
def is_uploaded(conn, message_id):
row = conn.execute(
"SELECT 1 FROM messages WHERE message_id = ? LIMIT 1", (message_id,)
).fetchone()
return row is not None
def save_to_db(conn, message_id, subject, sender, received_at, folder, source):
conn.execute("""
INSERT OR IGNORE INTO messages (message_id, subject, sender, received_at, folder, source)
VALUES (?, ?, ?, ?, ?, ?)
""", (message_id, subject, sender, received_at, folder, source))
conn.commit()
def upload_db(db_path):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jnjemails_{timestamp}.db"
with open(db_path, "rb") as f:
resp = requests.post(
"https://msgs.buzalka.cz/upload-db",
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=60
)
print(f" DB upload: {resp.json()}")
def upload_msg(msg_path, filename):
with open(msg_path, "rb") as f:
resp = requests.post(
UPLOAD_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=30
)
resp.raise_for_status()
return resp.json()["status"]
def get_folder_resume_date(conn, folder_path):
row = conn.execute(
"SELECT MAX(received_at) FROM messages WHERE folder = ?",
(folder_path,)
).fetchone()
if not row or not row[0]:
return None
last_dt = datetime.fromisoformat(row[0])
return last_dt - timedelta(hours=1)
def process_folder(conn, folder, source, folder_path="", counter=None):
if counter is None:
counter = [0]
current_path = f"{folder_path}/{folder.Name}"
try:
resume_dt = get_folder_resume_date(conn, current_path)
items = folder.Items
if resume_dt:
resume_str = resume_dt.strftime("%Y/%m/%d %H:%M:%S")
filter_str = f"@SQL=\"urn:schemas:httpmail:datereceived\" > '{resume_str}'"
items = folder.Items.Restrict(filter_str)
print(f"\n Složka: {current_path} | pokračuji od: {resume_str}")
else:
print(f"\n Složka: {current_path} | od začátku")
items.Sort("[ReceivedTime]", False)
count = 0
skipped = 0
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if is_uploaded(conn, mid):
skipped += 1
continue
with tempfile.TemporaryDirectory() as tmp:
safe_name = f"{item.EntryID[-20:]}.msg"
tmp_path = Path(tmp) / safe_name
item.SaveAs(str(tmp_path), 3)
status = upload_msg(tmp_path, safe_name)
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
save_to_db(conn, mid, item.Subject, item.SenderEmailAddress,
received, current_path, source)
counter[0] += 1
count += 1
if counter[0] % 1000 == 0:
print(f" → celkem {counter[0]} emailů přeneseno, uploaduji DB...")
upload_db(DB_PATH)
print(f" {status.upper():6} | {item.Subject[:60]}")
except Exception as e:
subject = getattr(item, 'Subject', '?')
sender = getattr(item, 'SenderEmailAddress', '?')
received = getattr(item, 'ReceivedTime', '?')
print(f" CHYBA | {subject[:40]} | {e}")
logging.error("folder=%s | sender=%s | received=%s | subject=%s | error=%s",
current_path, sender, received, subject, e)
print(f" → složka hotova: přeneseno {count} | skip {skipped}")
except Exception as e:
print(f" CHYBA složka {current_path}: {e}")
logging.error("folder=%s | CHYBA SLOŽKY | error=%s", current_path, e)
for subfolder in folder.Folders:
process_folder(conn, subfolder, source, current_path, counter)
# --- MAIN ---
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
init_db(conn)
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
inbox = ns.GetDefaultFolder(6) # 6 = olFolderInbox
mailbox_name = inbox.Parent.Name # např. "vbuzalka@its.jnj.com"
print(f"Schránka: {mailbox_name}")
source = "mailbox"
print(f"\n=== Inbox ({mailbox_name}) ===")
process_folder(conn, inbox, source, f"/{mailbox_name}")
# Finální DB upload po dokončení
print("\nFinální upload DB...")
upload_db(DB_PATH)
conn.close()
print(f"\nHotovo. Chyby logovány do: {LOG_PATH}")