notebook
This commit is contained in:
@@ -85,11 +85,14 @@ def safe_rtf_to_text(x):
|
|||||||
|
|
||||||
df["DEKURS"] = df["DEKURS"].apply(safe_rtf_to_text)
|
df["DEKURS"] = df["DEKURS"].apply(safe_rtf_to_text)
|
||||||
|
|
||||||
df.replace({r'(\r\n|\r|\n)': r'\r\n'}, regex=True, inplace=True)
|
# --- Normalize and clean line breaks more robustly ---
|
||||||
df.replace({r'[\ud800-\udfff\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]+': ''},
|
df["DEKURS"] = (
|
||||||
regex=True, inplace=True)
|
df["DEKURS"]
|
||||||
df.replace({r'(\r\n){2,}': r'\r\n', r'(\r\n)+$': ''},
|
.str.replace(r'\r\n|\r', '\n', regex=True) # normalize to \n
|
||||||
regex=True, inplace=True)
|
.str.replace(r'[ \t\xa0]*\n[ \t\xa0]*', '\n', regex=True) # strip spaces around newlines
|
||||||
|
.str.replace(r'\n{2,}', '\n', regex=True) # collapse 2+ newlines
|
||||||
|
.str.strip() # remove leading/trailing newlines/spaces
|
||||||
|
)
|
||||||
|
|
||||||
df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("")
|
df["PACIENT"] = df["PRIJMENI"].fillna("") + ", " + df["JMENO"].fillna("")
|
||||||
df.drop(columns=["PRIJMENI", "JMENO"], inplace=True)
|
df.drop(columns=["PRIJMENI", "JMENO"], inplace=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user