Files
medicus/MedicusWithClaude/analyze_rtf.py

35 lines
1.0 KiB
Python

import fdb, re
conn = fdb.connect(
dsn=r'localhost:c:\medicus 3\data\medicus.fdb',
user='SYSDBA', password='masterkey', charset='win1250'
)
cur = conn.cursor()
cur.execute('SELECT DEKURS FROM DEKURS WHERE ID=243082')
text = cur.fetchone()[0]
text = text.read() if hasattr(text, 'read') else text
text = text.decode('windows-1250', errors='replace') if isinstance(text, bytes) else text
# colortbl
ct = re.search(r'\\colortbl[^}]+\}', text)
if ct: print('COLORTBL:', ct.group(0))
# stylesheet
ss = re.search(r'\\stylesheet\{.+?\}(?=\n)', text, re.DOTALL)
if ss: print('\nSTYLESHEET:', ss.group(0)[:600])
# Najdi různé styly použité v textu
print('\n--- Použité RTF tagy (unikátní) ---')
tags = re.findall(r'\\[a-z]+\d*', text)
from collections import Counter
for tag, count in Counter(tags).most_common(40):
print(f" {tag:<20} {count}x")
# Ukázka tabulky
tbl_start = text.find(r'\trowd')
if tbl_start > 0:
print('\n--- Začátek tabulky ---')
print(text[tbl_start:tbl_start+300])
conn.close()