z230
This commit is contained in:
13
.claude/settings.local.json
Normal file
13
.claude/settings.local.json
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"Bash(python3 -c \":*)",
|
||||||
|
"Bash(pip install extract-msg)",
|
||||||
|
"Bash(python -c \":*)",
|
||||||
|
"Bash(pip install python-dateutil beautifulsoup4)",
|
||||||
|
"Bash(python msg_to_clipboard.py \"U:/Dropbox/Ordinace/Dokumentace_ke_zpracování/RE_ Žádost.msg\")",
|
||||||
|
"Bash(del \"U:\\\\medicus\\\\emailintoclipboard\\\\test_output.txt\")",
|
||||||
|
"Bash(start python msg_to_clipboard.py \"U:/Dropbox/Ordinace/Dokumentace_ke_zpracování/RE_ Žádost.msg\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
226
EmailIntoClipboard/msg_to_clipboard.py
Normal file
226
EmailIntoClipboard/msg_to_clipboard.py
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
"""
|
||||||
|
msg_to_clipboard.py
|
||||||
|
Převede Outlook .msg email (i vlákno odpovědí) do formátu:
|
||||||
|
DD.MM.YYYY HH:MM od Jméno: text
|
||||||
|
a zkopíruje do schránky.
|
||||||
|
|
||||||
|
Použití:
|
||||||
|
python msg_to_clipboard.py "cesta/k/souboru.msg"
|
||||||
|
nebo spustit bez argumentu -> otevře dialog pro výběr souboru
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import tkinter as tk
|
||||||
|
from tkinter import filedialog, messagebox
|
||||||
|
import extract_msg
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from dateutil import parser as dateparser
|
||||||
|
|
||||||
|
|
||||||
|
def decode_html(html_bytes: bytes) -> str:
|
||||||
|
"""Dekóduje HTML bytes – zkouší UTF-8 přísně (selže na špatných sekvencích),
|
||||||
|
pak záložní windows-1250 a latin-1."""
|
||||||
|
for enc in ("utf-8", "windows-1250", "latin-1"):
|
||||||
|
try:
|
||||||
|
return html_bytes.decode(enc) # utf-8 je přísné – selže pro non-UTF-8
|
||||||
|
except (UnicodeDecodeError, LookupError):
|
||||||
|
continue
|
||||||
|
return html_bytes.decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
|
||||||
|
def clean_text(text: str) -> str:
|
||||||
|
"""Smaže zbytečné bílé znaky a nbsp, vrátí jeden řádek."""
|
||||||
|
text = text.replace("\xa0", " ")
|
||||||
|
text = re.sub(r"\s+", " ", text)
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def is_separator(element) -> bool:
|
||||||
|
"""Vrátí True pokud je element Outlook oddělovač citace (border-top div)."""
|
||||||
|
if getattr(element, "name", None) != "div":
|
||||||
|
return False
|
||||||
|
inner = element.find("div", style=lambda s: s and "border-top" in s)
|
||||||
|
return inner is not None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_separator(element) -> tuple[str, str | None]:
|
||||||
|
"""Z oddělovače vytáhne jméno odesílatele a řetězec data."""
|
||||||
|
text = element.get_text(separator="\n")
|
||||||
|
from_match = re.search(r"From:\s*(.+?)(?:\n|$)", text)
|
||||||
|
sent_match = re.search(r"Sent:\s*(.+?)(?:\n|$)", text)
|
||||||
|
|
||||||
|
sender_raw = from_match.group(1).strip() if from_match else "Neznámý"
|
||||||
|
# "Jméno Příjmení <email>" → "Jméno Příjmení"
|
||||||
|
name_only = re.match(r"^(.+?)\s*<", sender_raw)
|
||||||
|
sender = name_only.group(1).strip() if name_only else sender_raw
|
||||||
|
|
||||||
|
sent_str = sent_match.group(1).strip() if sent_match else None
|
||||||
|
return sender, sent_str
|
||||||
|
|
||||||
|
|
||||||
|
def parse_date(sent_str: str | None, fallback=None):
|
||||||
|
if not sent_str:
|
||||||
|
return fallback
|
||||||
|
try:
|
||||||
|
return dateparser.parse(sent_str)
|
||||||
|
except Exception:
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
|
||||||
|
def extract_sections(soup, main_date, main_sender: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Projde HTML tělo a rozdělí ho na sekce (každá zpráva ve vláknu).
|
||||||
|
Vrátí seznam diktů: {'sender', 'date', 'text'} seřazených od nejstaršího.
|
||||||
|
"""
|
||||||
|
body_div = soup.find("div", class_="WordSection1") or soup.body
|
||||||
|
if not body_div:
|
||||||
|
return []
|
||||||
|
|
||||||
|
sections = []
|
||||||
|
current = {"sender": main_sender, "date": main_date, "parts": []}
|
||||||
|
|
||||||
|
for child in body_div.children:
|
||||||
|
if is_separator(child):
|
||||||
|
# Ulož aktuální sekci a začni novou
|
||||||
|
text = clean_text(" ".join(current["parts"]))
|
||||||
|
sections.append({
|
||||||
|
"sender": current["sender"],
|
||||||
|
"date": current["date"],
|
||||||
|
"text": text,
|
||||||
|
})
|
||||||
|
sender, sent_str = parse_separator(child)
|
||||||
|
current = {
|
||||||
|
"sender": sender,
|
||||||
|
"date": parse_date(sent_str),
|
||||||
|
"parts": [],
|
||||||
|
}
|
||||||
|
elif hasattr(child, "get_text"):
|
||||||
|
t = clean_text(child.get_text(separator=" "))
|
||||||
|
if t:
|
||||||
|
current["parts"].append(t)
|
||||||
|
|
||||||
|
# Poslední sekce
|
||||||
|
text = clean_text(" ".join(current["parts"]))
|
||||||
|
sections.append({
|
||||||
|
"sender": current["sender"],
|
||||||
|
"date": current["date"],
|
||||||
|
"text": text,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Otočit – v e-mailu je nejnovější nahoře, chceme chronologicky
|
||||||
|
sections.reverse()
|
||||||
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
def parse_msg(msg_path: str) -> list[dict]:
|
||||||
|
msg = extract_msg.openMsg(msg_path)
|
||||||
|
|
||||||
|
main_date = msg.date
|
||||||
|
# Sender: preferuj display name před celým polem
|
||||||
|
main_sender = msg.sender or "Neznámý"
|
||||||
|
name_only = re.match(r"^(.+?)\s*<", main_sender)
|
||||||
|
if name_only:
|
||||||
|
main_sender = name_only.group(1).strip()
|
||||||
|
|
||||||
|
if msg.htmlBody:
|
||||||
|
html_text = decode_html(msg.htmlBody)
|
||||||
|
soup = BeautifulSoup(html_text, "html.parser")
|
||||||
|
return extract_sections(soup, main_date, main_sender)
|
||||||
|
|
||||||
|
# Fallback – prostý text
|
||||||
|
return [{
|
||||||
|
"sender": main_sender,
|
||||||
|
"date": main_date,
|
||||||
|
"text": clean_text((msg.body or "").replace("\r\n", " ")),
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
def format_sections(sections: list[dict]) -> str:
|
||||||
|
lines = []
|
||||||
|
for s in sections:
|
||||||
|
dt = s["date"]
|
||||||
|
if dt:
|
||||||
|
date_str = dt.strftime("%d.%m.%Y %H:%M")
|
||||||
|
else:
|
||||||
|
date_str = "??"
|
||||||
|
lines.append(f"{date_str} od {s['sender']}: {s['text']}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def copy_to_clipboard(text: str):
|
||||||
|
root = tk.Tk()
|
||||||
|
root.withdraw()
|
||||||
|
root.clipboard_clear()
|
||||||
|
root.clipboard_append(text)
|
||||||
|
root.update()
|
||||||
|
root.after(2000, root.destroy)
|
||||||
|
root.mainloop()
|
||||||
|
|
||||||
|
|
||||||
|
def show_result(text: str):
|
||||||
|
"""Zobrazí výsledek v malém okně a zkopíruje do schránky."""
|
||||||
|
root = tk.Tk()
|
||||||
|
root.title("Email → schránka")
|
||||||
|
root.resizable(True, True)
|
||||||
|
|
||||||
|
frame = tk.Frame(root, padx=10, pady=10)
|
||||||
|
frame.pack(fill=tk.BOTH, expand=True)
|
||||||
|
|
||||||
|
label = tk.Label(frame, text="Zkopírováno do schránky. Náhled:", anchor="w")
|
||||||
|
label.pack(fill=tk.X)
|
||||||
|
|
||||||
|
text_widget = tk.Text(frame, wrap=tk.WORD, height=12, width=80)
|
||||||
|
text_widget.insert(tk.END, text)
|
||||||
|
text_widget.config(state=tk.DISABLED)
|
||||||
|
text_widget.pack(fill=tk.BOTH, expand=True, pady=(4, 0))
|
||||||
|
|
||||||
|
scrollbar = tk.Scrollbar(frame, command=text_widget.yview)
|
||||||
|
text_widget.config(yscrollcommand=scrollbar.set)
|
||||||
|
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
||||||
|
|
||||||
|
def copy_again():
|
||||||
|
root.clipboard_clear()
|
||||||
|
root.clipboard_append(text)
|
||||||
|
root.update()
|
||||||
|
|
||||||
|
btn_frame = tk.Frame(frame)
|
||||||
|
btn_frame.pack(fill=tk.X, pady=(8, 0))
|
||||||
|
tk.Button(btn_frame, text="Kopírovat znovu", command=copy_again).pack(side=tk.LEFT)
|
||||||
|
tk.Button(btn_frame, text="Zavřít", command=root.destroy).pack(side=tk.RIGHT)
|
||||||
|
|
||||||
|
# Auto-copy on open
|
||||||
|
root.clipboard_clear()
|
||||||
|
root.clipboard_append(text)
|
||||||
|
root.update()
|
||||||
|
|
||||||
|
root.mainloop()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) >= 2:
|
||||||
|
msg_path = sys.argv[1]
|
||||||
|
else:
|
||||||
|
root = tk.Tk()
|
||||||
|
root.withdraw()
|
||||||
|
msg_path = filedialog.askopenfilename(
|
||||||
|
title="Vyberte email (.msg)",
|
||||||
|
filetypes=[("Outlook Email", "*.msg"), ("Všechny soubory", "*.*")],
|
||||||
|
)
|
||||||
|
root.destroy()
|
||||||
|
if not msg_path:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
sections = parse_msg(msg_path)
|
||||||
|
result = format_sections(sections)
|
||||||
|
except Exception as exc:
|
||||||
|
messagebox.showerror("Chyba", f"Nepodařilo se zpracovat email:\n{exc}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(result)
|
||||||
|
show_result(result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
3
EmailIntoClipboard/spustit.bat
Normal file
3
EmailIntoClipboard/spustit.bat
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
@echo off
|
||||||
|
:: Přetáhněte .msg soubor na tento soubor, nebo spusťte bez argumentu pro výběr souboru
|
||||||
|
python "%~dp0msg_to_clipboard.py" %1
|
||||||
2
EmailIntoClipboard/test_output.txt
Normal file
2
EmailIntoClipboard/test_output.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
06.03.2026 11:19 od pavel kraus: Dobrý den, přeposílám dokument z Fakultní nemocnice Královské Vinohrady Kardiologická klinika, dejte mi prosím vědět, kdy k vám mám můj tchán Antonín Mareček, r.č. 370701406 přijít. Děkuji, Kraus
|
||||||
|
08.03.2026 17:14 od Ordinace MUDr. Michaela Buzalková: Dobrý den, To záleží na termínu, kdy je TAVI plánováno. Nevidím ho nikde ve zprávě. Hezký den Vladimír Buzalka
|
||||||
Reference in New Issue
Block a user