notebook
This commit is contained in:
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Hlida serverovy log (na Unraidu) dokud dany beh neskonci.
|
||||
Poluje pres SSH, tiskne ridky progress, skonci na koncovem markeru.
|
||||
Pouziti: _watch_server_log.py <vzdalena_cesta_logu> [marker]
|
||||
"""
|
||||
import sys
|
||||
import time
|
||||
|
||||
import paramiko
|
||||
|
||||
HOST = "192.168.1.76"
|
||||
USER = "root"
|
||||
PASS = "7309208104"
|
||||
|
||||
logpath = sys.argv[1] if len(sys.argv) > 1 else "/mnt/user/Scripts/MailStore/dryrun_full.log"
|
||||
marker = sys.argv[2] if len(sys.argv) > 2 else "Zprav proskenovano"
|
||||
|
||||
c = paramiko.SSHClient()
|
||||
c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
c.connect(HOST, username=USER, password=PASS, timeout=10)
|
||||
|
||||
|
||||
def sh(cmd):
|
||||
i, o, e = c.exec_command(cmd)
|
||||
return o.read().decode("utf-8", "replace")
|
||||
|
||||
|
||||
t0 = time.time()
|
||||
last_count = -1
|
||||
while True:
|
||||
content = sh(f"cat {logpath!r} 2>/dev/null")
|
||||
done = (marker in content) or ("Traceback" in content)
|
||||
folders = content.count("k dobrani=")
|
||||
if folders != last_count:
|
||||
mins = (time.time() - t0) / 60
|
||||
# posledni zpracovana slozka
|
||||
lines = [l for l in content.splitlines() if "k dobrani=" in l]
|
||||
last = lines[-1].strip() if lines else ""
|
||||
print(f"[{mins:4.1f} min] slozek hotovo: {folders:4} | {last[:70]}", flush=True)
|
||||
last_count = folders
|
||||
if done:
|
||||
print("=== HOTOVO ===", flush=True)
|
||||
tail = "\n".join(content.splitlines()[-10:])
|
||||
print(tail, flush=True)
|
||||
break
|
||||
time.sleep(30)
|
||||
|
||||
c.close()
|
||||
@@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test IMAP SEARCH proti MailStore serveru — ověření rychlosti a funkčnosti."""
|
||||
import imaplib
|
||||
import ssl
|
||||
import sys
|
||||
import time
|
||||
|
||||
HOST = "192.168.1.53"
|
||||
PORT = 143
|
||||
USER = "admin"
|
||||
PASS = "*$N(B)vMUym!%"
|
||||
|
||||
|
||||
def connect():
|
||||
ctx = ssl.create_default_context()
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
M = imaplib.IMAP4(HOST, PORT)
|
||||
M.starttls(ssl_context=ctx)
|
||||
M.login(USER, PASS)
|
||||
return M
|
||||
|
||||
|
||||
def main():
|
||||
t0 = time.time()
|
||||
M = connect()
|
||||
print(f"[{time.time()-t0:.1f}s] připojeno + login", flush=True)
|
||||
|
||||
# Přímý SELECT na konkrétní složku (LIST cizí archivy neukáže, SELECT ano)
|
||||
target = "vladimir.buzalka@buzalka.cz/Exchange vladimir.buzalka/Sent Items"
|
||||
typ, data = M.select(f'"{target}"', readonly=True)
|
||||
count = int(data[0]) if typ == "OK" and data and data[0] else 0
|
||||
print(f"[{time.time()-t0:.1f}s] SELECT '{target}' = {count} zpráv (typ={typ})", flush=True)
|
||||
if count == 0:
|
||||
M.logout()
|
||||
return
|
||||
|
||||
# Test SEARCH různých kritérií
|
||||
for crit, val in [("ALL", None), ("SUBJECT", "re"), ("FROM", "cz"), ("TEXT", "objednávka")]:
|
||||
ts = time.time()
|
||||
if val is None:
|
||||
typ, data = M.search(None, crit)
|
||||
else:
|
||||
typ, data = M.search(None, crit, f'"{val}"')
|
||||
nums = data[0].split() if data and data[0] else []
|
||||
label = crit if val is None else f'{crit} "{val}"'
|
||||
print(f"[{time.time()-t0:.1f}s] SEARCH {label}: {len(nums)} výsledků ({time.time()-ts:.2f}s)", flush=True)
|
||||
|
||||
M.logout()
|
||||
print(f"[{time.time()-t0:.1f}s] hotovo", flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,176 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
================================================================================
|
||||
Nazev: mailstore_folder_v1.0.py
|
||||
Verze: 1.0
|
||||
Datum: 2026-06-11
|
||||
Autor: Vladimir Buzalka (asistovano Claude)
|
||||
Popis: Vypise obsah jedne MailStore slozky jako seznam zprav
|
||||
(datum | od | predmet) pres davkovy IMAP FETCH hlavicek.
|
||||
Predstupen ingestu - overuje davkove cteni hlavicek.
|
||||
|
||||
Argument = plna cesta slozky (fullName z mapy), napr.:
|
||||
"vladimir.buzalka@buzalka.cz/Exchange vladimir.buzalka/Sent Items"
|
||||
|
||||
Zdroj: MailStore IMAP server, port 143, STARTTLS, auth Prosty text (LOGIN).
|
||||
IMAP FETCH BODY.PEEK[HEADER.FIELDS (...)] = hlavicky bez oznaceni
|
||||
jako precteno. Davkove jednim prikazem, ne po jedne zprave.
|
||||
|
||||
Spusteni:
|
||||
python mailstore_folder_v1.0.py "...slozka..." # poslednich 50
|
||||
python mailstore_folder_v1.0.py "...slozka..." --limit 200
|
||||
python mailstore_folder_v1.0.py "...slozka..." --all # vse (pozor velke slozky)
|
||||
python mailstore_folder_v1.0.py "...slozka..." --oldest # od nejstarsich
|
||||
================================================================================
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import email
|
||||
import imaplib
|
||||
import re
|
||||
import ssl
|
||||
import sys
|
||||
from email.header import decode_header
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
# --- konfigurace ------------------------------------------------------------
|
||||
HOST = "192.168.1.53"
|
||||
PORT = 143
|
||||
USER = "admin"
|
||||
PASS = "*$N(B)vMUym!%"
|
||||
|
||||
DEFAULT_LIMIT = 50
|
||||
|
||||
|
||||
# --- helpery ----------------------------------------------------------------
|
||||
|
||||
def connect() -> imaplib.IMAP4:
|
||||
ctx = ssl.create_default_context()
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
M = imaplib.IMAP4(HOST, PORT)
|
||||
M.starttls(ssl_context=ctx)
|
||||
M.login(USER, PASS)
|
||||
return M
|
||||
|
||||
|
||||
def encode_mutf7(s: str) -> str:
|
||||
"""Nazev IMAP slozky -> modified UTF-7 (RFC 3501) kvuli diakritice
|
||||
(MailStore neumi UTF8=ACCEPT). Vysledek je cisty ASCII."""
|
||||
import base64 as _b64
|
||||
res = []
|
||||
i, n = 0, len(s)
|
||||
while i < n:
|
||||
ch = s[i]; o = ord(ch)
|
||||
if 0x20 <= o <= 0x7e:
|
||||
res.append("&-" if ch == "&" else ch); i += 1
|
||||
else:
|
||||
j = i
|
||||
while j < n and not (0x20 <= ord(s[j]) <= 0x7e):
|
||||
j += 1
|
||||
enc = _b64.b64encode(s[i:j].encode("utf-16-be")).decode("ascii").rstrip("=").replace("/", ",")
|
||||
res.append("&" + enc + "-"); i = j
|
||||
return "".join(res)
|
||||
|
||||
|
||||
def dec(s: str | None) -> str:
|
||||
"""Dekoduje MIME-encoded hlavicku (=?utf-8?...?=) na citelny text."""
|
||||
if not s:
|
||||
return ""
|
||||
out = []
|
||||
for txt, enc in decode_header(s):
|
||||
if isinstance(txt, bytes):
|
||||
out.append(txt.decode(enc or "utf-8", errors="replace"))
|
||||
else:
|
||||
out.append(txt)
|
||||
return "".join(out).replace("\r", " ").replace("\n", " ").strip()
|
||||
|
||||
|
||||
def fmt_date(raw: str | None) -> str:
|
||||
if not raw:
|
||||
return "?"
|
||||
try:
|
||||
dt = parsedate_to_datetime(raw)
|
||||
return dt.strftime("%Y-%m-%d %H:%M")
|
||||
except Exception:
|
||||
return (raw or "")[:16]
|
||||
|
||||
|
||||
def short(s: str, n: int) -> str:
|
||||
s = s or ""
|
||||
return s if len(s) <= n else s[: n - 1] + "…"
|
||||
|
||||
|
||||
# IMAP FETCH header bloky prijdou jako tuple (b'N (BODY[...] {len}', b'<headers>')
|
||||
_NUM_RX = re.compile(rb"^(\d+)\s")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="Vypis obsahu MailStore slozky")
|
||||
ap.add_argument("folder", help="Plna cesta slozky (fullName z mapy)")
|
||||
ap.add_argument("--limit", type=int, default=DEFAULT_LIMIT,
|
||||
help=f"Pocet zprav (default {DEFAULT_LIMIT})")
|
||||
ap.add_argument("--all", action="store_true", help="Vsechny zpravy (ignoruje --limit)")
|
||||
ap.add_argument("--oldest", action="store_true",
|
||||
help="Od nejstarsich (default: od nejnovejsich)")
|
||||
args = ap.parse_args()
|
||||
|
||||
M = connect()
|
||||
typ, data = M.select(f'"{encode_mutf7(args.folder)}"', readonly=True)
|
||||
if typ != "OK":
|
||||
print(f"Slozku nelze otevrit: {data}", file=sys.stderr)
|
||||
return 1
|
||||
total = int(data[0]) if data and data[0] else 0
|
||||
print(f"Slozka: {args.folder}")
|
||||
print(f"Zprav celkem: {total:,}")
|
||||
if total == 0:
|
||||
M.logout()
|
||||
return 0
|
||||
|
||||
# urci rozsah porad. cisel (1 = nejstarsi, total = nejnovejsi)
|
||||
if args.all:
|
||||
lo, hi = 1, total
|
||||
else:
|
||||
n = min(args.limit, total)
|
||||
lo, hi = (1, n) if args.oldest else (total - n + 1, total)
|
||||
rng = f"{lo}:{hi}"
|
||||
shown = hi - lo + 1
|
||||
order = "nejstarsi" if args.oldest else "nejnovejsi"
|
||||
print(f"Zobrazuji {shown} zprav ({order} prvni), rozsah #{rng}")
|
||||
print("=" * 100)
|
||||
|
||||
# davkovy FETCH hlavicek
|
||||
typ, msgs = M.fetch(rng, "(BODY.PEEK[HEADER.FIELDS (DATE FROM SUBJECT)])")
|
||||
rows = []
|
||||
for item in msgs:
|
||||
if not isinstance(item, tuple):
|
||||
continue
|
||||
meta, hdr_bytes = item[0], item[1]
|
||||
m = _NUM_RX.match(meta or b"")
|
||||
seqno = int(m.group(1)) if m else 0
|
||||
hdr = email.message_from_bytes(hdr_bytes)
|
||||
rows.append((seqno, fmt_date(hdr.get("Date")),
|
||||
dec(hdr.get("From")), dec(hdr.get("Subject"))))
|
||||
|
||||
rows.sort(key=lambda r: r[0], reverse=not args.oldest)
|
||||
|
||||
print(f"{'#':>6} {'Datum':<16} {'Od':<32} Predmet")
|
||||
print("-" * 100)
|
||||
for seqno, d, frm, subj in rows:
|
||||
print(f"{seqno:>6} {d:<16} {short(frm, 32):<32} {short(subj, 40)}")
|
||||
|
||||
M.logout()
|
||||
print("=" * 100)
|
||||
print(f"Vypsano {len(rows)} zprav.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
sys.exit(main())
|
||||
except KeyboardInterrupt:
|
||||
print("\nPreruseno", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
@@ -0,0 +1,427 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
================================================================================
|
||||
Nazev: mailstore_ingest_v1.0.py
|
||||
Verze: 1.0
|
||||
Datum: 2026-06-11
|
||||
Autor: Vladimir Buzalka (asistovano Claude)
|
||||
Popis: Backfill stare historie z MailStore archivu do MongoDB `emaily`.
|
||||
Dobere do existujici kolekce schranky JEN zpravy, ktere tam jeste
|
||||
nejsou - dedup podle internet Message-ID (= _id v Mongu).
|
||||
|
||||
Cilove schema dokumentu = stejne jako Graph import, takze navazujici
|
||||
enrich_fulltext_emails + MCP `emaily` search funguji bez uprav.
|
||||
|
||||
Strategie:
|
||||
1. Nacti SET vsech Message-ID (_id) co uz v Mongu pro schranku jsou.
|
||||
2. Projdi slozky schranky (API GetChildFolders).
|
||||
3. Per slozka davkove stahni hlavicky (UID, DATE, MESSAGE-ID) - rychle.
|
||||
4. Kandidat = Message-ID neni v setu AND rok(DATE) >= --since.
|
||||
5. Pro kandidaty stahni cele telo (RFC822), naparsuj, upsert do Mongo.
|
||||
|
||||
Filtr data je client-side z DATE headeru (IMAP SEARCH je u MailStme 78s/k nicemu).
|
||||
|
||||
Spusteni:
|
||||
# KOLIK by se dobralo (nic nezapise) - delej VZDY prvni:
|
||||
python mailstore_ingest_v1.0.py "vladimir.buzalka@buzalka.cz" --since 2020 --dry-run
|
||||
# ostry beh:
|
||||
python mailstore_ingest_v1.0.py "vladimir.buzalka@buzalka.cz" --since 2020
|
||||
# test na jedne slozce / s limitem:
|
||||
python mailstore_ingest_v1.0.py "vladimir.buzalka@buzalka.cz" --since 2020 \
|
||||
--folder "vladimir.buzalka@buzalka.cz/Exchange vladimir.buzalka/Sent Items" --limit 50
|
||||
================================================================================
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import email
|
||||
import imaplib
|
||||
import json
|
||||
import re
|
||||
import ssl
|
||||
import sys
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from base64 import b64encode
|
||||
from datetime import datetime, timezone
|
||||
from email.header import decode_header
|
||||
from email.utils import getaddresses, parsedate_to_datetime
|
||||
|
||||
from pymongo import MongoClient, UpdateOne
|
||||
|
||||
# --- konfigurace ------------------------------------------------------------
|
||||
MS_HOST = "192.168.1.53"
|
||||
IMAP_PORT = 143
|
||||
API_PORT = 8463
|
||||
MS_USER = "admin"
|
||||
MS_PASS = "*$N(B)vMUym!%"
|
||||
|
||||
MONGO_URI = "mongodb://192.168.1.76:27017"
|
||||
MONGO_DB = "emaily"
|
||||
|
||||
HEADER_BATCH = 2000 # kolik hlavicek FETCHovat naraz
|
||||
UPSERT_BATCH = 100 # kolik dokumentu zapsat naraz do Mongo
|
||||
|
||||
# --- API (jen GetChildFolders na seznam slozek) -----------------------------
|
||||
_API_BASE = f"https://{MS_HOST}:{API_PORT}/api"
|
||||
_API_AUTH = "Basic " + b64encode(f"{MS_USER}:{MS_PASS}".encode()).decode()
|
||||
_CTX = ssl.create_default_context()
|
||||
_CTX.check_hostname = False
|
||||
_CTX.verify_mode = ssl.CERT_NONE
|
||||
|
||||
|
||||
def api_result(method: str, params: dict | None = None):
|
||||
data = urllib.parse.urlencode(params or {}).encode()
|
||||
req = urllib.request.Request(f"{_API_BASE}/invoke/{method}", data=data, method="POST",
|
||||
headers={"Authorization": _API_AUTH,
|
||||
"Content-Type": "application/x-www-form-urlencoded"})
|
||||
with urllib.request.urlopen(req, context=_CTX, timeout=30) as resp:
|
||||
r = json.loads(resp.read().decode("utf-8-sig"))
|
||||
if r.get("statusCode") != "succeeded":
|
||||
raise RuntimeError(f"{method}: {(r.get('error') or {}).get('message')}")
|
||||
return r.get("result")
|
||||
|
||||
|
||||
def collect_folders(mailbox: str) -> list[str]:
|
||||
"""Vrati seznam plnych cest vsech slozek schranky (rekurzivne)."""
|
||||
tree = api_result("GetChildFolders", {"folder": mailbox, "maxLevels": 20})
|
||||
out: list[str] = []
|
||||
|
||||
def walk(node):
|
||||
for ch in node.get("childFolders") or []:
|
||||
out.append(ch["fullName"])
|
||||
walk(ch)
|
||||
|
||||
walk(tree)
|
||||
return out
|
||||
|
||||
|
||||
# --- IMAP --------------------------------------------------------------------
|
||||
|
||||
def imap_connect() -> imaplib.IMAP4:
|
||||
ctx = ssl.create_default_context()
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
M = imaplib.IMAP4(MS_HOST, IMAP_PORT)
|
||||
M.starttls(ssl_context=ctx)
|
||||
M.login(MS_USER, MS_PASS)
|
||||
return M
|
||||
|
||||
|
||||
_SEQ_RX = re.compile(rb"^(\d+)\s")
|
||||
_UID_RX = re.compile(rb"UID (\d+)")
|
||||
|
||||
|
||||
def dec(s) -> str:
|
||||
if not s:
|
||||
return ""
|
||||
out = []
|
||||
for txt, enc in decode_header(s):
|
||||
out.append(txt.decode(enc or "utf-8", errors="replace") if isinstance(txt, bytes) else txt)
|
||||
return "".join(out).replace("\r", " ").replace("\n", " ").strip()
|
||||
|
||||
|
||||
def parse_date(raw) -> datetime | None:
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
dt = parsedate_to_datetime(raw)
|
||||
if dt.tzinfo:
|
||||
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
return dt
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def encode_mutf7(s: str) -> str:
|
||||
"""Nazev IMAP slozky -> modified UTF-7 (RFC 3501). MailStore neumi
|
||||
UTF8=ACCEPT, takze slozky s diakritikou (Dorucena posta) musi byt mUTF-7.
|
||||
Vysledek je cisty ASCII -> bezpecne projde imaplib (ascii encoding)."""
|
||||
res = []
|
||||
i, n = 0, len(s)
|
||||
while i < n:
|
||||
ch = s[i]
|
||||
o = ord(ch)
|
||||
if 0x20 <= o <= 0x7e:
|
||||
res.append("&-" if ch == "&" else ch)
|
||||
i += 1
|
||||
else:
|
||||
j = i
|
||||
while j < n and not (0x20 <= ord(s[j]) <= 0x7e):
|
||||
j += 1
|
||||
import base64 as _b64
|
||||
b = s[i:j].encode("utf-16-be")
|
||||
enc = _b64.b64encode(b).decode("ascii").rstrip("=").replace("/", ",")
|
||||
res.append("&" + enc + "-")
|
||||
i = j
|
||||
return "".join(res)
|
||||
|
||||
|
||||
def imap_select(M: imaplib.IMAP4, folder: str):
|
||||
"""SELECT slozky s mUTF-7 enkodovanim nazvu (kvuli diakritice)."""
|
||||
return M.select(f'"{encode_mutf7(folder)}"', readonly=True)
|
||||
|
||||
|
||||
def scan_folder_headers(M: imaplib.IMAP4, folder: str):
|
||||
"""Davkove stahne (seq, uid, msgid, date) vsech zprav slozky."""
|
||||
typ, data = imap_select(M, folder)
|
||||
if typ != "OK":
|
||||
return None, []
|
||||
total = int(data[0]) if data and data[0] else 0
|
||||
if total == 0:
|
||||
return 0, []
|
||||
items = []
|
||||
lo = 1
|
||||
while lo <= total:
|
||||
hi = min(lo + HEADER_BATCH - 1, total)
|
||||
typ, msgs = M.fetch(f"{lo}:{hi}",
|
||||
"(UID BODY.PEEK[HEADER.FIELDS (MESSAGE-ID DATE)])")
|
||||
for it in msgs:
|
||||
if not isinstance(it, tuple):
|
||||
continue
|
||||
meta, hdr = it[0], it[1]
|
||||
mseq = _SEQ_RX.match(meta or b"")
|
||||
muid = _UID_RX.search(meta or b"")
|
||||
h = email.message_from_bytes(hdr or b"")
|
||||
mid = (h.get("Message-ID") or "").strip()
|
||||
items.append((int(mseq.group(1)) if mseq else 0,
|
||||
int(muid.group(1)) if muid else 0,
|
||||
mid, parse_date(h.get("Date"))))
|
||||
lo = hi + 1
|
||||
return total, items
|
||||
|
||||
|
||||
def fetch_full(M: imaplib.IMAP4, seq: int) -> bytes | None:
|
||||
typ, data = M.fetch(str(seq), "(RFC822)")
|
||||
if typ != "OK" or not data or not isinstance(data[0], tuple):
|
||||
return None
|
||||
return data[0][1]
|
||||
|
||||
|
||||
# --- mapovani EML -> Mongo dokument -----------------------------------------
|
||||
|
||||
def relativize(folder: str, mailbox: str) -> str:
|
||||
"""schranka/Exchange X/Sent Items -> Sent Items (jako Graph folder_path)."""
|
||||
parts = folder.split("/")
|
||||
# odstran prefix schranky a 'Exchange ...' uroven
|
||||
if len(parts) >= 2 and parts[0] == mailbox:
|
||||
rest = parts[2:] if len(parts) > 2 else parts[1:]
|
||||
return "/".join(rest) if rest else parts[-1]
|
||||
return parts[-1]
|
||||
|
||||
|
||||
def parse_addr_one(raw) -> dict:
|
||||
if not raw:
|
||||
return {"email": None, "name": None}
|
||||
pairs = getaddresses([raw])
|
||||
if not pairs:
|
||||
return {"email": None, "name": None}
|
||||
name, addr = pairs[0]
|
||||
return {"email": (addr or "").lower() or None, "name": dec(name) or (addr or None)}
|
||||
|
||||
|
||||
def parse_recipients(msg) -> list[dict]:
|
||||
out = []
|
||||
for kind, hdr in (("to", "To"), ("cc", "Cc"), ("bcc", "Bcc")):
|
||||
val = msg.get(hdr)
|
||||
if not val:
|
||||
continue
|
||||
for name, addr in getaddresses([val]):
|
||||
if addr:
|
||||
out.append({"type": kind, "email": addr.lower(),
|
||||
"name": dec(name) or addr})
|
||||
return out
|
||||
|
||||
|
||||
def extract_bodies(msg):
|
||||
body_text = body_html = ""
|
||||
atts = []
|
||||
for part in msg.walk():
|
||||
if part.is_multipart():
|
||||
continue
|
||||
ct = part.get_content_type()
|
||||
disp = str(part.get("Content-Disposition") or "")
|
||||
payload = part.get_payload(decode=True)
|
||||
is_att = "attachment" in disp or (part.get_filename() and ct not in ("text/plain", "text/html"))
|
||||
if is_att:
|
||||
atts.append({
|
||||
"filename": dec(part.get_filename()) or "(bez nazvu)",
|
||||
"size_bytes": len(payload or b""),
|
||||
"mime_type": ct,
|
||||
"is_inline": "inline" in disp,
|
||||
})
|
||||
elif ct == "text/plain" and not body_text:
|
||||
body_text = (payload or b"").decode(part.get_content_charset() or "utf-8", errors="replace")
|
||||
elif ct == "text/html" and not body_html:
|
||||
body_html = (payload or b"").decode(part.get_content_charset() or "utf-8", errors="replace")
|
||||
return body_text, body_html, atts
|
||||
|
||||
|
||||
def build_doc(raw: bytes, uid: int, folder: str, mailbox: str) -> dict | None:
|
||||
msg = email.message_from_bytes(raw)
|
||||
mid = (msg.get("Message-ID") or "").strip()
|
||||
if not mid:
|
||||
return None
|
||||
dt = parse_date(msg.get("Date"))
|
||||
body_text, body_html, atts = extract_bodies(msg)
|
||||
now = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
preview = (body_text or "")[:255]
|
||||
return {
|
||||
"_id": mid,
|
||||
"source": "mailstore",
|
||||
"mailstore_uid": uid,
|
||||
"mailstore_folder": folder,
|
||||
# graph_id zamerne VYNECHANO: kolekce ma unique+sparse index na graph_id,
|
||||
# explicitni None by kolidoval (sparse ignoruje jen CHYBEJICI pole).
|
||||
"conversation_id": None,
|
||||
"folder_path": relativize(folder, mailbox),
|
||||
"subject": dec(msg.get("Subject")),
|
||||
"sender": parse_addr_one(msg.get("From")),
|
||||
"recipients": parse_recipients(msg),
|
||||
"to": dec(msg.get("To")),
|
||||
"cc": dec(msg.get("Cc")),
|
||||
"bcc": dec(msg.get("Bcc")),
|
||||
"sent_at": dt,
|
||||
"received_at": dt,
|
||||
"modified_at": now,
|
||||
"created_at": now,
|
||||
"parsed_at": now,
|
||||
"is_read": True,
|
||||
"is_draft": "draft" in folder.lower() or "koncept" in folder.lower(),
|
||||
"has_attachments": bool(atts),
|
||||
"attachment_count": len(atts),
|
||||
"attachments": atts,
|
||||
"body_html": body_html or None,
|
||||
"body_text": body_text or None,
|
||||
"body_preview": preview,
|
||||
}
|
||||
|
||||
|
||||
# --- hlavni ------------------------------------------------------------------
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="MailStore -> Mongo backfill (dedup dle Message-ID)")
|
||||
ap.add_argument("mailbox", help="Schranka (top-level slozka MailStore = Mongo kolekce)")
|
||||
ap.add_argument("--since", type=int, default=None,
|
||||
help="Ber jen zpravy s rokem >= SINCE (napr. 2020)")
|
||||
ap.add_argument("--until", type=int, default=None,
|
||||
help="Ber jen zpravy s rokem <= UNTIL")
|
||||
ap.add_argument("--folder", default=None, help="Jen jedna konkretni slozka (plna cesta)")
|
||||
ap.add_argument("--limit", type=int, default=None, help="Max zprav k ingestu (test)")
|
||||
ap.add_argument("--max-folders", type=int, default=None, help="Max slozek (diagnostika)")
|
||||
ap.add_argument("--dry-run", action="store_true",
|
||||
help="Jen spocitej kolik by se dobralo, NIC nezapisuj")
|
||||
args = ap.parse_args()
|
||||
|
||||
t0 = time.time()
|
||||
print(f"=== MailStore ingest v1.0 | schranka: {args.mailbox} ===")
|
||||
print(f"Filtr: rok >= {args.since or '-'}{' a <= ' + str(args.until) if args.until else ''}"
|
||||
f"{' [DRY-RUN]' if args.dry_run else ''}")
|
||||
|
||||
# Mongo + set znamych Message-ID
|
||||
mongo = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
|
||||
mongo.admin.command("ping")
|
||||
coll = mongo[MONGO_DB][args.mailbox]
|
||||
print("Nacitam existujici Message-ID z Mongo...", flush=True)
|
||||
known = set(coll.distinct("_id"))
|
||||
print(f" v Mongu uz mam: {len(known):,} zprav")
|
||||
|
||||
# slozky
|
||||
if args.folder:
|
||||
folders = [args.folder]
|
||||
else:
|
||||
folders = collect_folders(args.mailbox)
|
||||
print(f"Slozek ke kontrole: {len(folders)}")
|
||||
|
||||
M = imap_connect()
|
||||
|
||||
grand_seen = grand_cand = grand_ingested = 0
|
||||
queue: list[UpdateOne] = []
|
||||
|
||||
def flush():
|
||||
nonlocal queue
|
||||
if queue and not args.dry_run:
|
||||
coll.bulk_write(queue, ordered=False)
|
||||
queue = []
|
||||
|
||||
nonlocal_M = {"M": M}
|
||||
for fidx, folder in enumerate(folders):
|
||||
if args.max_folders and fidx >= args.max_folders:
|
||||
print(f" (--max-folders {args.max_folders} dosazeno)")
|
||||
break
|
||||
try:
|
||||
total, items = scan_folder_headers(nonlocal_M["M"], folder)
|
||||
except Exception as ex:
|
||||
# jedna chybna slozka nesmi shodit cely beh - zaloguj a pokracuj.
|
||||
# Pri chybe IMAP spojeni (abort) se prepoj.
|
||||
print(f" [{relativize(folder, args.mailbox)[:45]:45}] CHYBA: {type(ex).__name__}: {str(ex)[:80]}", flush=True)
|
||||
try:
|
||||
nonlocal_M["M"].logout()
|
||||
except Exception:
|
||||
pass
|
||||
nonlocal_M["M"] = imap_connect()
|
||||
continue
|
||||
M = nonlocal_M["M"]
|
||||
if not total:
|
||||
continue
|
||||
# kandidati: rok ok, neni v known, ma msgid
|
||||
cands = []
|
||||
for seq, uid, mid, dt in items:
|
||||
if not mid or mid in known:
|
||||
continue
|
||||
yr = dt.year if dt else None
|
||||
if args.since and (yr is None or yr < args.since):
|
||||
continue
|
||||
if args.until and (yr is None or yr > args.until):
|
||||
continue
|
||||
cands.append((seq, uid, mid))
|
||||
grand_seen += total
|
||||
grand_cand += len(cands)
|
||||
rel = relativize(folder, args.mailbox)
|
||||
print(f" [{rel[:45]:45}] zprav={total:>6} k dobrani={len(cands):>6}", flush=True)
|
||||
|
||||
if args.dry_run:
|
||||
continue
|
||||
|
||||
for seq, uid, mid in cands:
|
||||
if args.limit and grand_ingested >= args.limit:
|
||||
break
|
||||
raw = fetch_full(M, seq)
|
||||
if not raw:
|
||||
continue
|
||||
doc = build_doc(raw, uid, folder, args.mailbox)
|
||||
if not doc:
|
||||
continue
|
||||
queue.append(UpdateOne({"_id": doc["_id"]}, {"$setOnInsert": doc}, upsert=True))
|
||||
known.add(doc["_id"])
|
||||
grand_ingested += 1
|
||||
if len(queue) >= UPSERT_BATCH:
|
||||
flush()
|
||||
flush()
|
||||
if args.limit and grand_ingested >= args.limit:
|
||||
print(f" (dosazen limit {args.limit})")
|
||||
break
|
||||
|
||||
M.logout()
|
||||
flush()
|
||||
|
||||
print("-" * 64)
|
||||
print(f"Zprav proskenovano: {grand_seen:,}")
|
||||
print(f"K dobrani (chybi, v okne): {grand_cand:,}")
|
||||
if args.dry_run:
|
||||
print(">>> DRY-RUN: nic nezapsano. Pro ostry beh spust bez --dry-run.")
|
||||
else:
|
||||
print(f"Zapsano do Mongo: {grand_ingested:,}")
|
||||
print(f"Trvalo: {time.time()-t0:.1f}s")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
sys.exit(main())
|
||||
except KeyboardInterrupt:
|
||||
print("\nPreruseno", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
@@ -0,0 +1,176 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
================================================================================
|
||||
Nazev: mailstore_map_v1.0.py
|
||||
Verze: 1.0
|
||||
Datum: 2026-06-11
|
||||
Autor: Vladimir Buzalka (asistovano Claude)
|
||||
Popis: Vykresli "mapu" jedne MailStore schranky - strom slozek z
|
||||
Administration API (GetChildFolders) + celkovy pocet zprav schranky
|
||||
z GetFolderStatistics.
|
||||
|
||||
Argument = nazev schranky (top-level slozka v MailStore archivu),
|
||||
napr. "vladimir.buzalka@buzalka.cz" nebo "lenka.hanzalova".
|
||||
Seznam dostupnych schranek: --list (vola GetUsers/GetChildFolders root).
|
||||
|
||||
Zdroj: MailStore Server Administration API, HTTPS port 8463.
|
||||
Auth: admin / heslo (Basic). Parametry jako form-body. Async operace
|
||||
(GetFolderStatistics) se poluji pres /api/get-status.
|
||||
|
||||
Pozn.: API umi jen strukturu + souhrnne pocty per schranka. Pocty zprav per
|
||||
jednotliva slozka API levne nedava - to bude dalsi krok (IMAP STATUS).
|
||||
|
||||
Spusteni:
|
||||
python mailstore_map_v1.0.py "lenka.hanzalova"
|
||||
python mailstore_map_v1.0.py "vladimir.buzalka@buzalka.cz" --no-stats
|
||||
python mailstore_map_v1.0.py --list
|
||||
================================================================================
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import ssl
|
||||
import sys
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from base64 import b64encode
|
||||
|
||||
# --- konfigurace ------------------------------------------------------------
|
||||
HOST = "192.168.1.53"
|
||||
PORT = 8463
|
||||
USER = "admin"
|
||||
PASS = "*$N(B)vMUym!%"
|
||||
|
||||
BASE = f"https://{HOST}:{PORT}/api"
|
||||
_AUTH = "Basic " + b64encode(f"{USER}:{PASS}".encode()).decode()
|
||||
_CTX = ssl.create_default_context()
|
||||
_CTX.check_hostname = False
|
||||
_CTX.verify_mode = ssl.CERT_NONE
|
||||
|
||||
|
||||
# --- API helper -------------------------------------------------------------
|
||||
|
||||
def _post(path: str, params: dict | None = None) -> dict:
|
||||
"""Jeden POST na API, vrati naparsovany JSON (odstrani BOM)."""
|
||||
data = urllib.parse.urlencode(params or {}).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{BASE}/{path}", data=data, method="POST",
|
||||
headers={"Authorization": _AUTH,
|
||||
"Content-Type": "application/x-www-form-urlencoded"},
|
||||
)
|
||||
with urllib.request.urlopen(req, context=_CTX, timeout=30) as resp:
|
||||
raw = resp.read().decode("utf-8-sig") # utf-8-sig sezere BOM
|
||||
return json.loads(raw)
|
||||
|
||||
|
||||
def api(method: str, params: dict | None = None, poll_timeout: int = 120) -> dict:
|
||||
"""Zavola API funkci. Pokud je async (statusCode=running), poluje
|
||||
/api/get-status az do dokonceni. Vrati cely objekt odpovedi."""
|
||||
r = _post(f"invoke/{method}", params)
|
||||
if r.get("statusCode") != "running":
|
||||
return r
|
||||
token = r.get("token")
|
||||
sv = r.get("statusVersion", 0)
|
||||
t0 = time.time()
|
||||
while r.get("statusCode") == "running":
|
||||
if time.time() - t0 > poll_timeout:
|
||||
raise TimeoutError(f"{method}: polling prekrocil {poll_timeout}s")
|
||||
r = _post("get-status", {"token": token,
|
||||
"lastKnownStatusVersion": sv,
|
||||
"millisecondsTimeout": 5000})
|
||||
sv = r.get("statusVersion", sv)
|
||||
return r
|
||||
|
||||
|
||||
def api_result(method: str, params: dict | None = None):
|
||||
r = api(method, params)
|
||||
if r.get("statusCode") != "succeeded":
|
||||
err = (r.get("error") or {}).get("message", "neznama chyba")
|
||||
raise RuntimeError(f"{method} selhalo: {err}")
|
||||
return r.get("result")
|
||||
|
||||
|
||||
# --- formatovani ------------------------------------------------------------
|
||||
|
||||
def human_size(n: int) -> str:
|
||||
f = float(n)
|
||||
for unit in ("B", "KB", "MB", "GB", "TB"):
|
||||
if f < 1024 or unit == "TB":
|
||||
return f"{f:.1f} {unit}"
|
||||
f /= 1024
|
||||
|
||||
|
||||
def print_tree(node: dict, indent: int = 0) -> int:
|
||||
"""Rekurzivne vypise strom slozek. Vrati pocet vypsanych slozek."""
|
||||
count = 0
|
||||
for ch in node.get("childFolders") or []:
|
||||
marker = "+" if ch.get("hasChildFolders") else "-"
|
||||
print(f" {' ' * indent}{marker} {ch.get('name')}")
|
||||
count += 1
|
||||
count += print_tree(ch, indent + 1)
|
||||
return count
|
||||
|
||||
|
||||
# --- akce -------------------------------------------------------------------
|
||||
|
||||
def list_mailboxes() -> None:
|
||||
"""Vypise top-level slozky (schranky) v archivu."""
|
||||
root = api_result("GetChildFolders", {"maxLevels": 1})
|
||||
print("Dostupne schranky (top-level slozky archivu):")
|
||||
for ch in root.get("childFolders") or []:
|
||||
print(f" - {ch.get('name')}")
|
||||
|
||||
|
||||
def map_mailbox(mailbox: str, with_stats: bool = True) -> None:
|
||||
# 1) celkovy pocet zprav schranky (volitelne - GetFolderStatistics je ~20s)
|
||||
total = size = None
|
||||
if with_stats:
|
||||
print("Nacitam statistiky (GetFolderStatistics, muze trvat ~20s)...",
|
||||
file=sys.stderr, flush=True)
|
||||
stats = api_result("GetFolderStatistics") or []
|
||||
for s in stats:
|
||||
if s.get("folder") == mailbox:
|
||||
total, size = s.get("count"), s.get("size")
|
||||
break
|
||||
|
||||
# 2) strom slozek
|
||||
tree = api_result("GetChildFolders", {"folder": mailbox, "maxLevels": 20})
|
||||
|
||||
print("=" * 64)
|
||||
print(f"MAILSTORE MAPA SCHRANKY: {mailbox}")
|
||||
if total is not None:
|
||||
print(f"Celkem zprav: {total:,} Velikost: {human_size(size)}")
|
||||
print("=" * 64)
|
||||
n = print_tree(tree)
|
||||
print("-" * 64)
|
||||
print(f"Slozek celkem: {n}")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="MailStore mapa schranky (API)")
|
||||
ap.add_argument("mailbox", nargs="?", help="Nazev schranky (top-level slozka)")
|
||||
ap.add_argument("--list", action="store_true",
|
||||
help="Vypsat dostupne schranky a skoncit")
|
||||
ap.add_argument("--no-stats", action="store_true",
|
||||
help="Preskocit celkovy pocet zprav (rychlejsi, bez ~20s GetFolderStatistics)")
|
||||
args = ap.parse_args()
|
||||
|
||||
if args.list:
|
||||
list_mailboxes()
|
||||
return 0
|
||||
if not args.mailbox:
|
||||
ap.error("zadej nazev schranky, nebo --list pro seznam")
|
||||
map_mailbox(args.mailbox, with_stats=not args.no_stats)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
sys.exit(main())
|
||||
except KeyboardInterrupt:
|
||||
print("\nPreruseno", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
@@ -0,0 +1,212 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
================================================================================
|
||||
Nazev: mailstore_read_v1.0.py
|
||||
Verze: 1.0
|
||||
Datum: 2026-06-11
|
||||
Autor: Vladimir Buzalka (asistovano Claude)
|
||||
Popis: Precte JEDNU konkretni zpravu z MailStore slozky a vypise jeji plny
|
||||
obsah - hlavicky, telo (text), seznam priloh. Volitelne ulozi
|
||||
prilohy na disk. Posledni dilek rucniho prohlizece archivu.
|
||||
|
||||
Argumenty: <slozka> <cislo>
|
||||
slozka = plna cesta (fullName z mapy / vystupu mailstore_folder)
|
||||
cislo = poradove cislo zpravy (# z mailstore_folder), nebo UID s --uid
|
||||
|
||||
Zdroj: MailStore IMAP, port 143, STARTTLS, auth Prosty text (LOGIN).
|
||||
FETCH <n> (RFC822) = cely syrovy EML, naparsovan emailem.
|
||||
|
||||
Spusteni:
|
||||
python mailstore_read_v1.0.py "...slozka..." 63627
|
||||
python mailstore_read_v1.0.py "...slozka..." 12345 --uid # cislo je UID
|
||||
python mailstore_read_v1.0.py "...slozka..." 63627 --save .\att # ulozi prilohy
|
||||
python mailstore_read_v1.0.py "...slozka..." 63627 --raw # vypise cely EML
|
||||
================================================================================
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import email
|
||||
import imaplib
|
||||
import os
|
||||
import ssl
|
||||
import sys
|
||||
from email.header import decode_header
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
# --- konfigurace ------------------------------------------------------------
|
||||
HOST = "192.168.1.53"
|
||||
PORT = 143
|
||||
USER = "admin"
|
||||
PASS = "*$N(B)vMUym!%"
|
||||
|
||||
BODY_PREVIEW_CHARS = 4000 # kolik znaku tela vypsat na obrazovku
|
||||
|
||||
|
||||
def connect() -> imaplib.IMAP4:
|
||||
ctx = ssl.create_default_context()
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
M = imaplib.IMAP4(HOST, PORT)
|
||||
M.starttls(ssl_context=ctx)
|
||||
M.login(USER, PASS)
|
||||
return M
|
||||
|
||||
|
||||
def encode_mutf7(s: str) -> str:
|
||||
"""Nazev IMAP slozky -> modified UTF-7 (RFC 3501) kvuli diakritice."""
|
||||
import base64 as _b64
|
||||
res = []
|
||||
i, n = 0, len(s)
|
||||
while i < n:
|
||||
ch = s[i]; o = ord(ch)
|
||||
if 0x20 <= o <= 0x7e:
|
||||
res.append("&-" if ch == "&" else ch); i += 1
|
||||
else:
|
||||
j = i
|
||||
while j < n and not (0x20 <= ord(s[j]) <= 0x7e):
|
||||
j += 1
|
||||
enc = _b64.b64encode(s[i:j].encode("utf-16-be")).decode("ascii").rstrip("=").replace("/", ",")
|
||||
res.append("&" + enc + "-"); i = j
|
||||
return "".join(res)
|
||||
|
||||
|
||||
def dec(s: str | None) -> str:
|
||||
if not s:
|
||||
return ""
|
||||
out = []
|
||||
for txt, enc in decode_header(s):
|
||||
if isinstance(txt, bytes):
|
||||
out.append(txt.decode(enc or "utf-8", errors="replace"))
|
||||
else:
|
||||
out.append(txt)
|
||||
return "".join(out).replace("\r", " ").replace("\n", " ").strip()
|
||||
|
||||
|
||||
def html_to_text(html: str) -> str:
|
||||
"""HTML -> text. Zkusi bs4 (je v projektu), jinak hrubsi fallback."""
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
try:
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
except Exception:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
for t in soup(["script", "style", "head"]):
|
||||
t.decompose()
|
||||
text = soup.get_text(separator="\n")
|
||||
except Exception:
|
||||
import re
|
||||
text = re.sub(r"<[^>]+>", "", html)
|
||||
lines = [ln.strip() for ln in text.splitlines()]
|
||||
return "\n".join(ln for ln in lines if ln)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="Precist jednu zpravu z MailStore")
|
||||
ap.add_argument("folder", help="Plna cesta slozky")
|
||||
ap.add_argument("number", help="Poradove cislo zpravy (nebo UID s --uid)")
|
||||
ap.add_argument("--uid", action="store_true", help="Cislo je IMAP UID, ne poradi")
|
||||
ap.add_argument("--save", metavar="DIR", help="Ulozit prilohy do adresare")
|
||||
ap.add_argument("--raw", action="store_true", help="Vypsat cely syrovy EML a skoncit")
|
||||
args = ap.parse_args()
|
||||
|
||||
M = connect()
|
||||
typ, data = M.select(f'"{encode_mutf7(args.folder)}"', readonly=True)
|
||||
if typ != "OK":
|
||||
print(f"Slozku nelze otevrit: {data}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# FETCH cele zpravy (RFC822). UID FETCH kdyz --uid.
|
||||
if args.uid:
|
||||
typ, msg_data = M.uid("FETCH", args.number, "(RFC822)")
|
||||
else:
|
||||
typ, msg_data = M.fetch(args.number, "(RFC822)")
|
||||
if typ != "OK" or not msg_data or not isinstance(msg_data[0], tuple):
|
||||
print(f"Zpravu #{args.number} nelze nacist (typ={typ})", file=sys.stderr)
|
||||
M.logout()
|
||||
return 1
|
||||
|
||||
raw = msg_data[0][1]
|
||||
M.logout()
|
||||
|
||||
if args.raw:
|
||||
sys.stdout.buffer.write(raw)
|
||||
return 0
|
||||
|
||||
msg = email.message_from_bytes(raw)
|
||||
|
||||
# --- hlavicky ---
|
||||
print("=" * 80)
|
||||
print(f"Slozka : {args.folder}")
|
||||
print(f"{'UID' if args.uid else 'Cislo'} : {args.number}")
|
||||
print("-" * 80)
|
||||
print(f"Datum : {msg.get('Date')}")
|
||||
print(f"Od : {dec(msg.get('From'))}")
|
||||
print(f"Komu : {dec(msg.get('To'))}")
|
||||
if msg.get("Cc"):
|
||||
print(f"Kopie : {dec(msg.get('Cc'))}")
|
||||
print(f"Predmet : {dec(msg.get('Subject'))}")
|
||||
print(f"Msg-ID : {msg.get('Message-ID')}")
|
||||
print(f"EML velikost: {len(raw):,} bytu")
|
||||
|
||||
# --- telo + prilohy ---
|
||||
body_text = body_html = ""
|
||||
attachments = [] # (filename, size, payload)
|
||||
for part in msg.walk():
|
||||
if part.is_multipart():
|
||||
continue
|
||||
ct = part.get_content_type()
|
||||
disp = str(part.get("Content-Disposition") or "")
|
||||
payload = part.get_payload(decode=True)
|
||||
if "attachment" in disp or (part.get_filename() and ct not in ("text/plain", "text/html")):
|
||||
attachments.append((dec(part.get_filename()) or "(bez nazvu)",
|
||||
len(payload or b""), payload or b""))
|
||||
elif ct == "text/plain" and not body_text:
|
||||
body_text = (payload or b"").decode(part.get_content_charset() or "utf-8", errors="replace")
|
||||
elif ct == "text/html" and not body_html:
|
||||
body_html = (payload or b"").decode(part.get_content_charset() or "utf-8", errors="replace")
|
||||
|
||||
print("-" * 80)
|
||||
if attachments:
|
||||
print(f"Prilohy ({len(attachments)}):")
|
||||
for name, size, _ in attachments:
|
||||
print(f" - {name} ({size:,} B)")
|
||||
else:
|
||||
print("Prilohy: zadne")
|
||||
|
||||
# telo: preferuj plain, jinak html->text
|
||||
text = body_text or (html_to_text(body_html) if body_html else "")
|
||||
src = "text/plain" if body_text else ("text/html->text" if body_html else "(zadne)")
|
||||
print("-" * 80)
|
||||
print(f"TELO ({src}, {len(text):,} znaku):")
|
||||
print("-" * 80)
|
||||
if text:
|
||||
print(text[:BODY_PREVIEW_CHARS])
|
||||
if len(text) > BODY_PREVIEW_CHARS:
|
||||
print(f"\n... [zkraceno, celkem {len(text):,} znaku] ...")
|
||||
else:
|
||||
print("(prazdne telo)")
|
||||
|
||||
# --- ulozeni priloh ---
|
||||
if args.save and attachments:
|
||||
os.makedirs(args.save, exist_ok=True)
|
||||
print("-" * 80)
|
||||
for name, size, payload in attachments:
|
||||
safe = name.replace("/", "_").replace("\\", "_") or "att.bin"
|
||||
path = os.path.join(args.save, safe)
|
||||
with open(path, "wb") as f:
|
||||
f.write(payload)
|
||||
print(f"Ulozeno: {path} ({size:,} B)")
|
||||
|
||||
print("=" * 80)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
sys.exit(main())
|
||||
except KeyboardInterrupt:
|
||||
print("\nPreruseno", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user