@@ -0,0 +1,226 @@
"""
msg_to_clipboard.py
Převede Outlook .msg email (i vlákno odpovědí) do formátu:
DD.MM.YYYY HH:MM od Jméno: text
a zkopíruje do schránky.
Použití:
python msg_to_clipboard.py " cesta/k/souboru.msg "
nebo spustit bez argumentu -> otevře dialog pro výběr souboru
"""
import sys
import re
import tkinter as tk
from tkinter import filedialog , messagebox
import extract_msg
from bs4 import BeautifulSoup
from dateutil import parser as dateparser
def decode_html ( html_bytes : bytes ) - > str :
""" Dekóduje HTML bytes – zkouší UTF-8 přísně (selže na špatných sekvencích),
pak záložní windows-1250 a latin-1. """
for enc in ( " utf-8 " , " windows-1250 " , " latin-1 " ) :
try :
return html_bytes . decode ( enc ) # utf-8 je přísné – selže pro non-UTF-8
except ( UnicodeDecodeError , LookupError ) :
continue
return html_bytes . decode ( " utf-8 " , errors = " replace " )
def clean_text ( text : str ) - > str :
""" Smaže zbytečné bílé znaky a nbsp, vrátí jeden řádek. """
text = text . replace ( " \xa0 " , " " )
text = re . sub ( r " \ s+ " , " " , text )
return text . strip ( )
def is_separator ( element ) - > bool :
""" Vrátí True pokud je element Outlook oddělovač citace (border-top div). """
if getattr ( element , " name " , None ) != " div " :
return False
inner = element . find ( " div " , style = lambda s : s and " border-top " in s )
return inner is not None
def parse_separator ( element ) - > tuple [ str , str | None ] :
""" Z oddělovače vytáhne jméno odesílatele a řetězec data. """
text = element . get_text ( separator = " \n " )
from_match = re . search ( r " From: \ s*(.+?)(?: \ n|$) " , text )
sent_match = re . search ( r " Sent: \ s*(.+?)(?: \ n|$) " , text )
sender_raw = from_match . group ( 1 ) . strip ( ) if from_match else " Neznámý "
# "Jméno Příjmení <email>" → "Jméno Příjmení"
name_only = re . match ( r " ^(.+?) \ s*< " , sender_raw )
sender = name_only . group ( 1 ) . strip ( ) if name_only else sender_raw
sent_str = sent_match . group ( 1 ) . strip ( ) if sent_match else None
return sender , sent_str
def parse_date ( sent_str : str | None , fallback = None ) :
if not sent_str :
return fallback
try :
return dateparser . parse ( sent_str )
except Exception :
return fallback
def extract_sections ( soup , main_date , main_sender : str ) - > list [ dict ] :
"""
Projde HTML tělo a rozdělí ho na sekce (každá zpráva ve vláknu).
Vrátí seznam diktů: { ' sender ' , ' date ' , ' text ' } seřazených od nejstaršího.
"""
body_div = soup . find ( " div " , class_ = " WordSection1 " ) or soup . body
if not body_div :
return [ ]
sections = [ ]
current = { " sender " : main_sender , " date " : main_date , " parts " : [ ] }
for child in body_div . children :
if is_separator ( child ) :
# Ulož aktuální sekci a začni novou
text = clean_text ( " " . join ( current [ " parts " ] ) )
sections . append ( {
" sender " : current [ " sender " ] ,
" date " : current [ " date " ] ,
" text " : text ,
} )
sender , sent_str = parse_separator ( child )
current = {
" sender " : sender ,
" date " : parse_date ( sent_str ) ,
" parts " : [ ] ,
}
elif hasattr ( child , " get_text " ) :
t = clean_text ( child . get_text ( separator = " " ) )
if t :
current [ " parts " ] . append ( t )
# Poslední sekce
text = clean_text ( " " . join ( current [ " parts " ] ) )
sections . append ( {
" sender " : current [ " sender " ] ,
" date " : current [ " date " ] ,
" text " : text ,
} )
# Otočit – v e-mailu je nejnovější nahoře, chceme chronologicky
sections . reverse ( )
return sections
def parse_msg ( msg_path : str ) - > list [ dict ] :
msg = extract_msg . openMsg ( msg_path )
main_date = msg . date
# Sender: preferuj display name před celým polem
main_sender = msg . sender or " Neznámý "
name_only = re . match ( r " ^(.+?) \ s*< " , main_sender )
if name_only :
main_sender = name_only . group ( 1 ) . strip ( )
if msg . htmlBody :
html_text = decode_html ( msg . htmlBody )
soup = BeautifulSoup ( html_text , " html.parser " )
return extract_sections ( soup , main_date , main_sender )
# Fallback – prostý text
return [ {
" sender " : main_sender ,
" date " : main_date ,
" text " : clean_text ( ( msg . body or " " ) . replace ( " \r \n " , " " ) ) ,
} ]
def format_sections ( sections : list [ dict ] ) - > str :
lines = [ ]
for s in sections :
dt = s [ " date " ]
if dt :
date_str = dt . strftime ( " %d . % m. % Y % H: % M " )
else :
date_str = " ?? "
lines . append ( f " { date_str } od { s [ ' sender ' ] } : { s [ ' text ' ] } " )
return " \n " . join ( lines )
def copy_to_clipboard ( text : str ) :
root = tk . Tk ( )
root . withdraw ( )
root . clipboard_clear ( )
root . clipboard_append ( text )
root . update ( )
root . after ( 2000 , root . destroy )
root . mainloop ( )
def show_result ( text : str ) :
""" Zobrazí výsledek v malém okně a zkopíruje do schránky. """
root = tk . Tk ( )
root . title ( " Email → schránka " )
root . resizable ( True , True )
frame = tk . Frame ( root , padx = 10 , pady = 10 )
frame . pack ( fill = tk . BOTH , expand = True )
label = tk . Label ( frame , text = " Zkopírováno do schránky. Náhled: " , anchor = " w " )
label . pack ( fill = tk . X )
text_widget = tk . Text ( frame , wrap = tk . WORD , height = 12 , width = 80 )
text_widget . insert ( tk . END , text )
text_widget . config ( state = tk . DISABLED )
text_widget . pack ( fill = tk . BOTH , expand = True , pady = ( 4 , 0 ) )
scrollbar = tk . Scrollbar ( frame , command = text_widget . yview )
text_widget . config ( yscrollcommand = scrollbar . set )
scrollbar . pack ( side = tk . RIGHT , fill = tk . Y )
def copy_again ( ) :
root . clipboard_clear ( )
root . clipboard_append ( text )
root . update ( )
btn_frame = tk . Frame ( frame )
btn_frame . pack ( fill = tk . X , pady = ( 8 , 0 ) )
tk . Button ( btn_frame , text = " Kopírovat znovu " , command = copy_again ) . pack ( side = tk . LEFT )
tk . Button ( btn_frame , text = " Zavřít " , command = root . destroy ) . pack ( side = tk . RIGHT )
# Auto-copy on open
root . clipboard_clear ( )
root . clipboard_append ( text )
root . update ( )
root . mainloop ( )
def main ( ) :
if len ( sys . argv ) > = 2 :
msg_path = sys . argv [ 1 ]
else :
root = tk . Tk ( )
root . withdraw ( )
msg_path = filedialog . askopenfilename (
title = " Vyberte email (.msg) " ,
filetypes = [ ( " Outlook Email " , " *.msg " ) , ( " Všechny soubory " , " *.* " ) ] ,
)
root . destroy ( )
if not msg_path :
return
try :
sections = parse_msg ( msg_path )
result = format_sections ( sections )
except Exception as exc :
messagebox . showerror ( " Chyba " , f " Nepodařilo se zpracovat email: \n { exc } " )
return
print ( result )
show_result ( result )
if __name__ == " __main__ " :
main ( )