Handle Korean numeric dates in TTS

Convert common date and count numerals to Korean readings so MMS outputs month/day and attendee counts correctly.
2026-01-30 20:42:36 +09:00
parent 35dae473ec
commit e240153e3f
1 changed files with 63 additions and 0 deletions
--- a/server/tts_service.py
+++ b/server/tts_service.py
@@ -41,6 +41,18 @@ _PHRASE_MAP = [
    ("Naver Blog", "네이버 블로그"),
    ("Brother Korea", "브라더 코리아"),
 ]
 _NUM_KO = {
    0: "영",
    1: "일",
    2: "이",
    3: "삼",
    4: "사",
    5: "오",
    6: "육",
    7: "칠",
    8: "팔",
    9: "구",
 }
 def _get_mms():
@@ -127,10 +139,61 @@ def _spell_abbrev(match: re.Match) -> str:
    return " ".join(_LETTER_KO.get(ch, ch) for ch in match.group(0))
 def _sino_korean(num: int) -> str:
    if num == 0:
        return _NUM_KO[0]
    parts = []
    if num >= 1000:
        thousands = num // 1000
        if thousands > 1:
            parts.append(_NUM_KO[thousands])
        parts.append("천")
        num %= 1000
    if num >= 100:
        hundreds = num // 100
        if hundreds > 1:
            parts.append(_NUM_KO[hundreds])
        parts.append("백")
        num %= 100
    if num >= 10:
        tens = num // 10
        if tens > 1:
            parts.append(_NUM_KO[tens])
        parts.append("십")
        num %= 10
    if num > 0:
        parts.append(_NUM_KO[num])
    return "".join(parts)
 def _replace_numbers(text: str) -> str:
    def _year(match: re.Match) -> str:
        return f"{_sino_korean(int(match.group(1)))}년"
    def _month_day(match: re.Match) -> str:
        month = _sino_korean(int(match.group(1)))
        day = _sino_korean(int(match.group(2)))
        return f"{month}월 {day}일"
    def _approx(match: re.Match) -> str:
        return f"{_sino_korean(int(match.group(1)))}여"
    def _count(match: re.Match) -> str:
        return f"{_sino_korean(int(match.group(1)))}명"
    text = re.sub(r"\b(\d{4})\s*년\b", _year, text)
    text = re.sub(r"\b(\d{1,2})\s*월\s*(\d{1,2})\s*일\b", _month_day, text)
    text = re.sub(r"\b(\d+)\s*여\b", _approx, text)
    text = re.sub(r"\b(\d+)\s*명\b", _count, text)
    return text
 def _preprocess_text(text: str) -> str:
    # 영어 약어/브랜드 발음 보정
    for src, dst in _PHRASE_MAP:
        text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE)
    text = _replace_numbers(text)
    text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text)
    # 괄호/구두점으로 인한 끊김을 완화
    text = text.replace("(", " ").replace(")", " ")