Z230
This commit is contained in:
@@ -26,9 +26,22 @@ def ocr_page(page):
|
||||
|
||||
|
||||
def extract_rodne_cislo(text):
|
||||
"""Extract 10-digit rodné číslo (no slash)."""
|
||||
m = re.search(r"\b\d{9,10}\b", text)
|
||||
return m.group(0) if m else None
|
||||
"""
|
||||
Extract rodné číslo in formats:
|
||||
- 6 digits + slash + 4 digits → 655527/1910
|
||||
- 6 digits + slash + 3 digits → 655527/910
|
||||
- 10 digits without slash → 6555271910
|
||||
|
||||
Always returns 10 digits without slash.
|
||||
"""
|
||||
m = re.search(r"\b(\d{6})/?(\d{3,4})\b", text)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
left = m.group(1)
|
||||
right = m.group(2).zfill(4) # ensure 4 digits
|
||||
|
||||
return left + right
|
||||
|
||||
|
||||
def extract_date(text):
|
||||
|
||||
Reference in New Issue
Block a user