Files
janssen/EmailsImport/janssenpc_email_send_v1.1.py

225 lines
7.0 KiB
Python

"""
janssenpc_email_send v1.1
Verze: 1.1
Datum: 2026-05-28
Popis: Prochází všechny složky Outlooku (MAPI), ukládá emailové zprávy jako .msg
soubory a uploaduje je na https://msgs.buzalka.cz. Zaznamenává zpracované
zprávy do SQLite DB (jnjemails.db) a DB periodicky uploaduje na server.
Podporuje pokračování od posledního zpracovaného emailu (resume).
Nově: před startem zkontroluje jestli Outlook běží, pokud ne, spustí ho
automaticky (cesta z registru) a počká na inicializaci MAPI.
"""
import win32com.client
import requests
import sqlite3
import urllib3
import subprocess
import winreg
import time
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
import io
import psutil
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
def is_outlook_running():
return any(p.name().lower() == "outlook.exe" for p in psutil.process_iter())
def find_outlook_path():
try:
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\OUTLOOK.EXE")
path, _ = winreg.QueryValueEx(key, "")
winreg.CloseKey(key)
return path
except FileNotFoundError:
return None
def ensure_outlook_running():
outlook_path = find_outlook_path()
print(f"Cesta k Outlooku: {outlook_path}")
if is_outlook_running():
print("Outlook již běží.")
else:
if not outlook_path:
print("CHYBA: Outlook nenalezen v registru.")
exit(1)
print("Outlook neběží, spouštím...")
subprocess.Popen([outlook_path])
print("Čekám na inicializaci Outlooku", end="", flush=True)
for _ in range(30):
time.sleep(2)
print(".", end="", flush=True)
if is_outlook_running():
break
print()
time.sleep(20) # dát čas MAPI vrstvě nastartovat
def init_db(conn):
conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT NOT NULL,
subject TEXT,
sender TEXT,
received_at TEXT,
folder TEXT,
source TEXT,
uploaded_at TEXT DEFAULT (datetime('now'))
)
""")
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
conn.commit()
def is_uploaded(conn, message_id):
row = conn.execute(
"SELECT 1 FROM messages WHERE message_id = ? LIMIT 1", (message_id,)
).fetchone()
return row is not None
def save_to_db(conn, message_id, subject, sender, received_at, folder, source):
conn.execute("""
INSERT OR IGNORE INTO messages (message_id, subject, sender, received_at, folder, source)
VALUES (?, ?, ?, ?, ?, ?)
""", (message_id, subject, sender, received_at, folder, source))
conn.commit()
def upload_db(db_path):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jnjemails_{timestamp}.db"
with open(db_path, "rb") as f:
resp = requests.post(
"https://msgs.buzalka.cz/upload-db",
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=60
)
print(f" DB upload: {resp.json()}")
def upload_msg(msg_path, filename):
with open(msg_path, "rb") as f:
resp = requests.post(
UPLOAD_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=30
)
resp.raise_for_status()
return resp.json()["status"]
def get_folder_resume_date(conn, folder_path):
row = conn.execute(
"SELECT MAX(received_at) FROM messages WHERE folder = ?",
(folder_path,)
).fetchone()
if not row or not row[0]:
return None
last_dt = datetime.fromisoformat(row[0])
return last_dt - timedelta(hours=1)
def process_folder(conn, folder, source, folder_path="", counter=None):
if counter is None:
counter = [0]
current_path = f"{folder_path}/{folder.Name}"
try:
resume_dt = get_folder_resume_date(conn, current_path)
items = folder.Items
if resume_dt:
resume_str = resume_dt.strftime("%Y/%m/%d %H:%M:%S")
filter_str = f"@SQL=\"urn:schemas:httpmail:datereceived\" > '{resume_str}'"
items = folder.Items.Restrict(filter_str)
print(f"\n Složka: {current_path} | pokračuji od: {resume_str}")
else:
print(f"\n Složka: {current_path} | od začátku")
items.Sort("[ReceivedTime]", False)
count = 0
skipped = 0
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if is_uploaded(conn, mid):
skipped += 1
continue
with tempfile.TemporaryDirectory() as tmp:
safe_name = f"{item.EntryID[-20:]}.msg"
tmp_path = Path(tmp) / safe_name
item.SaveAs(str(tmp_path), 3)
status = upload_msg(tmp_path, safe_name)
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
save_to_db(conn, mid, item.Subject, item.SenderEmailAddress,
received, current_path, source)
counter[0] += 1
count += 1
if counter[0] % 1000 == 0:
print(f" → celkem {counter[0]} emailů přeneseno, uploaduji DB...")
upload_db(DB_PATH)
print(f" {status.upper():6} | {item.Subject[:60]}")
except Exception as e:
print(f" CHYBA | {getattr(item, 'Subject', '?')[:40]} | {e}")
print(f" → složka hotova: přeneseno {count} | skip {skipped}")
except Exception as e:
print(f" CHYBA složka {current_path}: {e}")
for subfolder in folder.Folders:
process_folder(conn, subfolder, source, current_path, counter)
# --- MAIN ---
ensure_outlook_running()
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
init_db(conn)
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
for i in range(1, ns.Folders.Count + 1):
root = ns.Folders.Item(i)
# if "Archive" in root.Name:
# print(f"\n=== {root.Name} — přeskočeno ===")
# continue
source = "mailbox"
print(f"\n=== {root.Name} ({source}) ===")
process_folder(conn, root, source)
# Finální DB upload po dokončení
print("\nFinální upload DB...")
upload_db(DB_PATH)
conn.close()
print("\nHotovo.")