This commit is contained in:
2026-05-28 07:32:37 +02:00
parent e6c414d14a
commit 10beab5c84
6 changed files with 850 additions and 11 deletions
@@ -1,3 +1,12 @@
"""
janssenpc_email_send_new v1.0
Verze: 1.0
Datum: 2026-05-28
Popis: Prochází pouze složku Inbox v Outlooku (MAPI), ukládá emailové zprávy jako .msg
soubory a uploaduje je na https://msgs.buzalka.cz. Zaznamenává zpracované
zprávy do SQLite DB (jnjemails.db) a DB periodicky uploaduje na server.
Podporuje pokračování od posledního zpracovaného emailu (resume).
"""
import win32com.client
import requests
import sqlite3
@@ -155,20 +164,14 @@ init_db(conn)
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
for i in range(1, ns.Folders.Count + 1):
root = ns.Folders.Item(i)
# if "Archive" in root.Name:
# print(f"\n=== {root.Name} — přeskočeno ===")
# continue
source = "mailbox"
print(f"\n=== {root.Name} ({source}) ===")
process_folder(conn, root, source)
inbox = ns.GetDefaultFolder(6) # 6 = olFolderInbox
source = "mailbox"
print(f"\n=== Inbox ===")
process_folder(conn, inbox, source)
# Finální DB upload po dokončení
print("\nFinální upload DB...")
upload_db(DB_PATH)
conn.close()
print("\nHotovo.")
print("\nHotovo.")
@@ -0,0 +1,195 @@
"""
janssenpc_email_send_new v1.1
Verze: 1.1
Datum: 2026-05-28
Popis: Prochází pouze složku Inbox v Outlooku (MAPI), ukládá emailové zprávy jako .msg
soubory a uploaduje je na https://msgs.buzalka.cz. Zaznamenává zpracované
zprávy do SQLite DB (jnjemails.db) a DB periodicky uploaduje na server.
Podporuje pokračování od posledního zpracovaného emailu (resume).
Nově: chyby při uploadu se logují do souboru jnjemails_errors.log
(timestamp, složka, odesílatel, předmět, chyba).
"""
import win32com.client
import requests
import sqlite3
import urllib3
import logging
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
import io
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails_errors.log"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
logging.basicConfig(
filename=LOG_PATH,
level=logging.ERROR,
format="%(asctime)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
encoding="utf-8",
)
def init_db(conn):
conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT NOT NULL,
subject TEXT,
sender TEXT,
received_at TEXT,
folder TEXT,
source TEXT,
uploaded_at TEXT DEFAULT (datetime('now'))
)
""")
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
conn.commit()
def is_uploaded(conn, message_id):
row = conn.execute(
"SELECT 1 FROM messages WHERE message_id = ? LIMIT 1", (message_id,)
).fetchone()
return row is not None
def save_to_db(conn, message_id, subject, sender, received_at, folder, source):
conn.execute("""
INSERT OR IGNORE INTO messages (message_id, subject, sender, received_at, folder, source)
VALUES (?, ?, ?, ?, ?, ?)
""", (message_id, subject, sender, received_at, folder, source))
conn.commit()
def upload_db(db_path):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jnjemails_{timestamp}.db"
with open(db_path, "rb") as f:
resp = requests.post(
"https://msgs.buzalka.cz/upload-db",
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=60
)
print(f" DB upload: {resp.json()}")
def upload_msg(msg_path, filename):
with open(msg_path, "rb") as f:
resp = requests.post(
UPLOAD_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=30
)
resp.raise_for_status()
return resp.json()["status"]
def get_folder_resume_date(conn, folder_path):
row = conn.execute(
"SELECT MAX(received_at) FROM messages WHERE folder = ?",
(folder_path,)
).fetchone()
if not row or not row[0]:
return None
last_dt = datetime.fromisoformat(row[0])
return last_dt - timedelta(hours=1)
def process_folder(conn, folder, source, folder_path="", counter=None):
if counter is None:
counter = [0]
current_path = f"{folder_path}/{folder.Name}"
try:
resume_dt = get_folder_resume_date(conn, current_path)
items = folder.Items
if resume_dt:
resume_str = resume_dt.strftime("%Y/%m/%d %H:%M:%S")
filter_str = f"@SQL=\"urn:schemas:httpmail:datereceived\" > '{resume_str}'"
items = folder.Items.Restrict(filter_str)
print(f"\n Složka: {current_path} | pokračuji od: {resume_str}")
else:
print(f"\n Složka: {current_path} | od začátku")
items.Sort("[ReceivedTime]", False)
count = 0
skipped = 0
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if is_uploaded(conn, mid):
skipped += 1
continue
with tempfile.TemporaryDirectory() as tmp:
safe_name = f"{item.EntryID[-20:]}.msg"
tmp_path = Path(tmp) / safe_name
item.SaveAs(str(tmp_path), 3)
status = upload_msg(tmp_path, safe_name)
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
save_to_db(conn, mid, item.Subject, item.SenderEmailAddress,
received, current_path, source)
counter[0] += 1
count += 1
if counter[0] % 1000 == 0:
print(f" → celkem {counter[0]} emailů přeneseno, uploaduji DB...")
upload_db(DB_PATH)
print(f" {status.upper():6} | {item.Subject[:60]}")
except Exception as e:
subject = getattr(item, 'Subject', '?')
sender = getattr(item, 'SenderEmailAddress', '?')
received = getattr(item, 'ReceivedTime', '?')
print(f" CHYBA | {subject[:40]} | {e}")
logging.error("folder=%s | sender=%s | received=%s | subject=%s | error=%s",
current_path, sender, received, subject, e)
print(f" → složka hotova: přeneseno {count} | skip {skipped}")
except Exception as e:
print(f" CHYBA složka {current_path}: {e}")
logging.error("folder=%s | CHYBA SLOŽKY | error=%s", current_path, e)
for subfolder in folder.Folders:
process_folder(conn, subfolder, source, current_path, counter)
# --- MAIN ---
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
init_db(conn)
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
inbox = ns.GetDefaultFolder(6) # 6 = olFolderInbox
source = "mailbox"
print(f"\n=== Inbox ===")
process_folder(conn, inbox, source)
# Finální DB upload po dokončení
print("\nFinální upload DB...")
upload_db(DB_PATH)
conn.close()
print(f"\nHotovo. Chyby logovány do: {LOG_PATH}")
@@ -0,0 +1,199 @@
"""
janssenpc_email_send_new v1.2
Verze: 1.2
Datum: 2026-05-28
Popis: Prochází pouze složku Inbox v Outlooku (MAPI), ukládá emailové zprávy jako .msg
soubory a uploaduje je na https://msgs.buzalka.cz. Zaznamenává zpracované
zprávy do SQLite DB (jnjemails.db) a DB periodicky uploaduje na server.
Podporuje pokračování od posledního zpracovaného emailu (resume).
Chyby při uploadu se logují do souboru jnjemails_errors.log.
Oprava v1.2: folder cesta obsahuje celé jméno schránky (např. /vbuzalka@its.jnj.com/Inbox)
aby resume logika správně navazovala na záznamy z původního skriptu.
"""
import win32com.client
import requests
import sqlite3
import urllib3
import logging
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
import io
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails_errors.log"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
logging.basicConfig(
filename=LOG_PATH,
level=logging.ERROR,
format="%(asctime)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
encoding="utf-8",
)
def init_db(conn):
conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT NOT NULL,
subject TEXT,
sender TEXT,
received_at TEXT,
folder TEXT,
source TEXT,
uploaded_at TEXT DEFAULT (datetime('now'))
)
""")
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
conn.commit()
def is_uploaded(conn, message_id):
row = conn.execute(
"SELECT 1 FROM messages WHERE message_id = ? LIMIT 1", (message_id,)
).fetchone()
return row is not None
def save_to_db(conn, message_id, subject, sender, received_at, folder, source):
conn.execute("""
INSERT OR IGNORE INTO messages (message_id, subject, sender, received_at, folder, source)
VALUES (?, ?, ?, ?, ?, ?)
""", (message_id, subject, sender, received_at, folder, source))
conn.commit()
def upload_db(db_path):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jnjemails_{timestamp}.db"
with open(db_path, "rb") as f:
resp = requests.post(
"https://msgs.buzalka.cz/upload-db",
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=60
)
print(f" DB upload: {resp.json()}")
def upload_msg(msg_path, filename):
with open(msg_path, "rb") as f:
resp = requests.post(
UPLOAD_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=30
)
resp.raise_for_status()
return resp.json()["status"]
def get_folder_resume_date(conn, folder_path):
row = conn.execute(
"SELECT MAX(received_at) FROM messages WHERE folder = ?",
(folder_path,)
).fetchone()
if not row or not row[0]:
return None
last_dt = datetime.fromisoformat(row[0])
return last_dt - timedelta(hours=1)
def process_folder(conn, folder, source, folder_path="", counter=None):
if counter is None:
counter = [0]
current_path = f"{folder_path}/{folder.Name}"
try:
resume_dt = get_folder_resume_date(conn, current_path)
items = folder.Items
if resume_dt:
resume_str = resume_dt.strftime("%Y/%m/%d %H:%M:%S")
filter_str = f"@SQL=\"urn:schemas:httpmail:datereceived\" > '{resume_str}'"
items = folder.Items.Restrict(filter_str)
print(f"\n Složka: {current_path} | pokračuji od: {resume_str}")
else:
print(f"\n Složka: {current_path} | od začátku")
items.Sort("[ReceivedTime]", False)
count = 0
skipped = 0
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if is_uploaded(conn, mid):
skipped += 1
continue
with tempfile.TemporaryDirectory() as tmp:
safe_name = f"{item.EntryID[-20:]}.msg"
tmp_path = Path(tmp) / safe_name
item.SaveAs(str(tmp_path), 3)
status = upload_msg(tmp_path, safe_name)
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
save_to_db(conn, mid, item.Subject, item.SenderEmailAddress,
received, current_path, source)
counter[0] += 1
count += 1
if counter[0] % 1000 == 0:
print(f" → celkem {counter[0]} emailů přeneseno, uploaduji DB...")
upload_db(DB_PATH)
print(f" {status.upper():6} | {item.Subject[:60]}")
except Exception as e:
subject = getattr(item, 'Subject', '?')
sender = getattr(item, 'SenderEmailAddress', '?')
received = getattr(item, 'ReceivedTime', '?')
print(f" CHYBA | {subject[:40]} | {e}")
logging.error("folder=%s | sender=%s | received=%s | subject=%s | error=%s",
current_path, sender, received, subject, e)
print(f" → složka hotova: přeneseno {count} | skip {skipped}")
except Exception as e:
print(f" CHYBA složka {current_path}: {e}")
logging.error("folder=%s | CHYBA SLOŽKY | error=%s", current_path, e)
for subfolder in folder.Folders:
process_folder(conn, subfolder, source, current_path, counter)
# --- MAIN ---
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
init_db(conn)
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
inbox = ns.GetDefaultFolder(6) # 6 = olFolderInbox
mailbox_name = inbox.Parent.Name # např. "vbuzalka@its.jnj.com"
print(f"Schránka: {mailbox_name}")
source = "mailbox"
print(f"\n=== Inbox ({mailbox_name}) ===")
process_folder(conn, inbox, source, f"/{mailbox_name}")
# Finální DB upload po dokončení
print("\nFinální upload DB...")
upload_db(DB_PATH)
conn.close()
print(f"\nHotovo. Chyby logovány do: {LOG_PATH}")
+18
View File
@@ -0,0 +1,18 @@
"""
db_cleanup_inbox v1.0
Verze: 1.0
Datum: 2026-05-28
Popis: Jednorázový cleanup - smaže záznamy v SQLite DB kde folder = '/Inbox'
(záznamy bez emailové adresy v cestě, vytvořené chybnou verzí skriptu).
"""
import sqlite3
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
conn = sqlite3.connect(DB_PATH)
deleted = conn.execute("DELETE FROM messages WHERE folder = '/Inbox'").rowcount
conn.commit()
conn.close()
print(f"Smazáno záznamů: {deleted}")
print("Hotovo.")
@@ -0,0 +1,200 @@
"""
janssenpc_email_send_new v1.3
Verze: 1.3
Datum: 2026-05-28
Popis: Prochází složky Inbox, Deleted Items a Sent Items v Outlooku (MAPI),
ukládá emailové zprávy jako .msg soubory a uploaduje je na https://msgs.buzalka.cz.
Zaznamenává zpracované zprávy do SQLite DB (jnjemails.db) a DB periodicky
uploaduje na server. Podporuje pokračování od posledního zpracovaného emailu (resume).
Folder cesta obsahuje celé jméno schránky (např. /vbuzalka@its.jnj.com/Inbox).
Chyby při uploadu se logují do souboru jnjemails_errors.log.
"""
import win32com.client
import requests
import sqlite3
import urllib3
import logging
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
import io
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
LOG_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails_errors.log"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
# olFolderInbox=6, olFolderDeletedItems=3, olFolderSentMail=5
FOLDERS_TO_PROCESS = [6, 3, 5]
logging.basicConfig(
filename=LOG_PATH,
level=logging.ERROR,
format="%(asctime)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
encoding="utf-8",
)
def init_db(conn):
conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT NOT NULL,
subject TEXT,
sender TEXT,
received_at TEXT,
folder TEXT,
source TEXT,
uploaded_at TEXT DEFAULT (datetime('now'))
)
""")
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
conn.commit()
def is_uploaded(conn, message_id):
row = conn.execute(
"SELECT 1 FROM messages WHERE message_id = ? LIMIT 1", (message_id,)
).fetchone()
return row is not None
def save_to_db(conn, message_id, subject, sender, received_at, folder, source):
conn.execute("""
INSERT OR IGNORE INTO messages (message_id, subject, sender, received_at, folder, source)
VALUES (?, ?, ?, ?, ?, ?)
""", (message_id, subject, sender, received_at, folder, source))
conn.commit()
def upload_db(db_path):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jnjemails_{timestamp}.db"
with open(db_path, "rb") as f:
resp = requests.post(
"https://msgs.buzalka.cz/upload-db",
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=60
)
print(f" DB upload: {resp.json()}")
def upload_msg(msg_path, filename):
with open(msg_path, "rb") as f:
resp = requests.post(
UPLOAD_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=30
)
resp.raise_for_status()
return resp.json()["status"]
def get_folder_resume_date(conn, folder_path):
row = conn.execute(
"SELECT MAX(received_at) FROM messages WHERE folder = ?",
(folder_path,)
).fetchone()
if not row or not row[0]:
return None
last_dt = datetime.fromisoformat(row[0])
return last_dt - timedelta(hours=1)
def process_folder(conn, folder, source, folder_path="", counter=None):
if counter is None:
counter = [0]
current_path = f"{folder_path}/{folder.Name}"
try:
resume_dt = get_folder_resume_date(conn, current_path)
items = folder.Items
if resume_dt:
resume_str = resume_dt.strftime("%Y/%m/%d %H:%M:%S")
filter_str = f"@SQL=\"urn:schemas:httpmail:datereceived\" > '{resume_str}'"
items = folder.Items.Restrict(filter_str)
print(f"\n Složka: {current_path} | pokračuji od: {resume_str}")
else:
print(f"\n Složka: {current_path} | od začátku")
items.Sort("[ReceivedTime]", False)
count = 0
skipped = 0
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if is_uploaded(conn, mid):
skipped += 1
continue
with tempfile.TemporaryDirectory() as tmp:
safe_name = f"{item.EntryID[-20:]}.msg"
tmp_path = Path(tmp) / safe_name
item.SaveAs(str(tmp_path), 3)
status = upload_msg(tmp_path, safe_name)
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
save_to_db(conn, mid, item.Subject, item.SenderEmailAddress,
received, current_path, source)
counter[0] += 1
count += 1
if counter[0] % 1000 == 0:
print(f" → celkem {counter[0]} emailů přeneseno, uploaduji DB...")
upload_db(DB_PATH)
print(f" {status.upper():6} | {item.Subject[:60]}")
except Exception as e:
subject = getattr(item, 'Subject', '?')
sender = getattr(item, 'SenderEmailAddress', '?')
received = getattr(item, 'ReceivedTime', '?')
print(f" CHYBA | {subject[:40]} | {e}")
logging.error("folder=%s | sender=%s | received=%s | subject=%s | error=%s",
current_path, sender, received, subject, e)
print(f" → složka hotova: přeneseno {count} | skip {skipped}")
except Exception as e:
print(f" CHYBA složka {current_path}: {e}")
logging.error("folder=%s | CHYBA SLOŽKY | error=%s", current_path, e)
for subfolder in folder.Folders:
process_folder(conn, subfolder, source, current_path, counter)
# --- MAIN ---
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
init_db(conn)
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
counter = [0]
for folder_id in FOLDERS_TO_PROCESS:
folder = ns.GetDefaultFolder(folder_id)
mailbox_name = folder.Parent.Name
print(f"\n=== {folder.Name} ({mailbox_name}) ===")
process_folder(conn, folder, "mailbox", f"/{mailbox_name}", counter)
# Finální DB upload po dokončení
print("\nFinální upload DB...")
upload_db(DB_PATH)
conn.close()
print(f"\nHotovo. Chyby logovány do: {LOG_PATH}")
+224
View File
@@ -0,0 +1,224 @@
"""
janssenpc_email_send v1.1
Verze: 1.1
Datum: 2026-05-28
Popis: Prochází všechny složky Outlooku (MAPI), ukládá emailové zprávy jako .msg
soubory a uploaduje je na https://msgs.buzalka.cz. Zaznamenává zpracované
zprávy do SQLite DB (jnjemails.db) a DB periodicky uploaduje na server.
Podporuje pokračování od posledního zpracovaného emailu (resume).
Nově: před startem zkontroluje jestli Outlook běží, pokud ne, spustí ho
automaticky (cesta z registru) a počká na inicializaci MAPI.
"""
import win32com.client
import requests
import sqlite3
import urllib3
import subprocess
import winreg
import time
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
import io
import psutil
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
TOKEN = "13e1bb01-9fd5-44a8-8ce9-4ee27133d340"
UPLOAD_URL = "https://msgs.buzalka.cz/upload"
DB_PATH = r"C:\Users\vbuzalka\SQLITE\jnjemails.db"
PR_INTERNET_MESSAGE_ID = "http://schemas.microsoft.com/mapi/proptag/0x1035001E"
def is_outlook_running():
return any(p.name().lower() == "outlook.exe" for p in psutil.process_iter())
def find_outlook_path():
try:
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\OUTLOOK.EXE")
path, _ = winreg.QueryValueEx(key, "")
winreg.CloseKey(key)
return path
except FileNotFoundError:
return None
def ensure_outlook_running():
outlook_path = find_outlook_path()
print(f"Cesta k Outlooku: {outlook_path}")
if is_outlook_running():
print("Outlook již běží.")
else:
if not outlook_path:
print("CHYBA: Outlook nenalezen v registru.")
exit(1)
print("Outlook neběží, spouštím...")
subprocess.Popen([outlook_path])
print("Čekám na inicializaci Outlooku", end="", flush=True)
for _ in range(30):
time.sleep(2)
print(".", end="", flush=True)
if is_outlook_running():
break
print()
time.sleep(20) # dát čas MAPI vrstvě nastartovat
def init_db(conn):
conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
message_id TEXT NOT NULL,
subject TEXT,
sender TEXT,
received_at TEXT,
folder TEXT,
source TEXT,
uploaded_at TEXT DEFAULT (datetime('now'))
)
""")
conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_message_id ON messages(message_id)")
conn.commit()
def is_uploaded(conn, message_id):
row = conn.execute(
"SELECT 1 FROM messages WHERE message_id = ? LIMIT 1", (message_id,)
).fetchone()
return row is not None
def save_to_db(conn, message_id, subject, sender, received_at, folder, source):
conn.execute("""
INSERT OR IGNORE INTO messages (message_id, subject, sender, received_at, folder, source)
VALUES (?, ?, ?, ?, ?, ?)
""", (message_id, subject, sender, received_at, folder, source))
conn.commit()
def upload_db(db_path):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"jnjemails_{timestamp}.db"
with open(db_path, "rb") as f:
resp = requests.post(
"https://msgs.buzalka.cz/upload-db",
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=60
)
print(f" DB upload: {resp.json()}")
def upload_msg(msg_path, filename):
with open(msg_path, "rb") as f:
resp = requests.post(
UPLOAD_URL,
headers={"Authorization": f"Bearer {TOKEN}"},
files={"file": (filename, f, "application/octet-stream")},
timeout=30
)
resp.raise_for_status()
return resp.json()["status"]
def get_folder_resume_date(conn, folder_path):
row = conn.execute(
"SELECT MAX(received_at) FROM messages WHERE folder = ?",
(folder_path,)
).fetchone()
if not row or not row[0]:
return None
last_dt = datetime.fromisoformat(row[0])
return last_dt - timedelta(hours=1)
def process_folder(conn, folder, source, folder_path="", counter=None):
if counter is None:
counter = [0]
current_path = f"{folder_path}/{folder.Name}"
try:
resume_dt = get_folder_resume_date(conn, current_path)
items = folder.Items
if resume_dt:
resume_str = resume_dt.strftime("%Y/%m/%d %H:%M:%S")
filter_str = f"@SQL=\"urn:schemas:httpmail:datereceived\" > '{resume_str}'"
items = folder.Items.Restrict(filter_str)
print(f"\n Složka: {current_path} | pokračuji od: {resume_str}")
else:
print(f"\n Složka: {current_path} | od začátku")
items.Sort("[ReceivedTime]", False)
count = 0
skipped = 0
for item in items:
try:
if not item.MessageClass.upper().startswith("IPM.NOTE"):
continue
try:
mid = item.PropertyAccessor.GetProperty(PR_INTERNET_MESSAGE_ID)
except:
mid = None
if not mid:
mid = f"entryid:{item.EntryID}"
if is_uploaded(conn, mid):
skipped += 1
continue
with tempfile.TemporaryDirectory() as tmp:
safe_name = f"{item.EntryID[-20:]}.msg"
tmp_path = Path(tmp) / safe_name
item.SaveAs(str(tmp_path), 3)
status = upload_msg(tmp_path, safe_name)
received = item.ReceivedTime.isoformat() if item.ReceivedTime else None
save_to_db(conn, mid, item.Subject, item.SenderEmailAddress,
received, current_path, source)
counter[0] += 1
count += 1
if counter[0] % 1000 == 0:
print(f" → celkem {counter[0]} emailů přeneseno, uploaduji DB...")
upload_db(DB_PATH)
print(f" {status.upper():6} | {item.Subject[:60]}")
except Exception as e:
print(f" CHYBA | {getattr(item, 'Subject', '?')[:40]} | {e}")
print(f" → složka hotova: přeneseno {count} | skip {skipped}")
except Exception as e:
print(f" CHYBA složka {current_path}: {e}")
for subfolder in folder.Folders:
process_folder(conn, subfolder, source, current_path, counter)
# --- MAIN ---
ensure_outlook_running()
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
init_db(conn)
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
for i in range(1, ns.Folders.Count + 1):
root = ns.Folders.Item(i)
# if "Archive" in root.Name:
# print(f"\n=== {root.Name} — přeskočeno ===")
# continue
source = "mailbox"
print(f"\n=== {root.Name} ({source}) ===")
process_folder(conn, root, source)
# Finální DB upload po dokončení
print("\nFinální upload DB...")
upload_db(DB_PATH)
conn.close()
print("\nHotovo.")