Handle Korean numeric dates in TTS

Convert common date and count numerals to Korean readings so MMS outputs month/day and attendee counts correctly.
2026-01-30 20:42:36 +09:00
parent 35dae473ec
commit e240153e3f
1 changed files with 63 additions and 0 deletions
--- a/server/tts_service.py
+++ b/server/tts_service.py
@@ -41,6 +41,18 @@ _PHRASE_MAP = [
    ("Naver Blog", "네이버 블로그"),
    ("Brother Korea", "브라더 코리아"),
 ]
+_NUM_KO = {
+    0: "영",
+    1: "일",
+    2: "이",
+    3: "삼",
+    4: "사",
+    5: "오",
+    6: "육",
+    7: "칠",
+    8: "팔",
+    9: "구",
+}


 def _get_mms():
@@ -127,10 +139,61 @@ def _spell_abbrev(match: re.Match) -> str:
    return " ".join(_LETTER_KO.get(ch, ch) for ch in match.group(0))


+def _sino_korean(num: int) -> str:
+    if num == 0:
+        return _NUM_KO[0]
+
+    parts = []
+    if num >= 1000:
+        thousands = num // 1000
+        if thousands > 1:
+            parts.append(_NUM_KO[thousands])
+        parts.append("천")
+        num %= 1000
+    if num >= 100:
+        hundreds = num // 100
+        if hundreds > 1:
+            parts.append(_NUM_KO[hundreds])
+        parts.append("백")
+        num %= 100
+    if num >= 10:
+        tens = num // 10
+        if tens > 1:
+            parts.append(_NUM_KO[tens])
+        parts.append("십")
+        num %= 10
+    if num > 0:
+        parts.append(_NUM_KO[num])
+    return "".join(parts)
+
+
+def _replace_numbers(text: str) -> str:
+    def _year(match: re.Match) -> str:
+        return f"{_sino_korean(int(match.group(1)))}년"
+
+    def _month_day(match: re.Match) -> str:
+        month = _sino_korean(int(match.group(1)))
+        day = _sino_korean(int(match.group(2)))
+        return f"{month}월 {day}일"
+
+    def _approx(match: re.Match) -> str:
+        return f"{_sino_korean(int(match.group(1)))}여"
+
+    def _count(match: re.Match) -> str:
+        return f"{_sino_korean(int(match.group(1)))}명"
+
+    text = re.sub(r"\b(\d{4})\s*년\b", _year, text)
+    text = re.sub(r"\b(\d{1,2})\s*월\s*(\d{1,2})\s*일\b", _month_day, text)
+    text = re.sub(r"\b(\d+)\s*여\b", _approx, text)
+    text = re.sub(r"\b(\d+)\s*명\b", _count, text)
+    return text
+
+
 def _preprocess_text(text: str) -> str:
    # 영어 약어/브랜드 발음 보정
    for src, dst in _PHRASE_MAP:
        text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE)
+    text = _replace_numbers(text)
    text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text)
    # 괄호/구두점으로 인한 끊김을 완화
    text = text.replace("(", " ").replace(")", " ")