This commit is contained in:
2025-11-23 22:05:21 +01:00
parent d4894fde95
commit 5fe221ea94
3 changed files with 62 additions and 4 deletions

View File

@@ -26,9 +26,22 @@ def ocr_page(page):
def extract_rodne_cislo(text):
"""Extract 10-digit rodné číslo (no slash)."""
m = re.search(r"\b\d{9,10}\b", text)
return m.group(0) if m else None
"""
Extract rodné číslo in formats:
- 6 digits + slash + 4 digits → 655527/1910
- 6 digits + slash + 3 digits → 655527/910
- 10 digits without slash → 6555271910
Always returns 10 digits without slash.
"""
m = re.search(r"\b(\d{6})/?(\d{3,4})\b", text)
if not m:
return None
left = m.group(1)
right = m.group(2).zfill(4) # ensure 4 digits
return left + right
def extract_date(text):