Normalize English terms for Korean TTS

Preprocess text to spell abbreviations and map common English phrases to Korean pronunciation.
2026-01-30 20:17:17 +09:00
parent 1e8ff342c7
commit 8cdfa1bf4f
1 changed files with 48 additions and 0 deletions
--- a/server/tts_service.py
+++ b/server/tts_service.py
@@ -1,4 +1,5 @@
 import os
+import re
 import subprocess
 import tempfile
 from pathlib import Path
@@ -7,6 +8,39 @@ from typing import Optional, Tuple
 import pyttsx3

 _MMS_CACHE: Optional[Tuple[object, object]] = None
+_LETTER_KO = {
+    "A": "에이",
+    "B": "비",
+    "C": "씨",
+    "D": "디",
+    "E": "이",
+    "F": "에프",
+    "G": "지",
+    "H": "에이치",
+    "I": "아이",
+    "J": "제이",
+    "K": "케이",
+    "L": "엘",
+    "M": "엠",
+    "N": "엔",
+    "O": "오",
+    "P": "피",
+    "Q": "큐",
+    "R": "알",
+    "S": "에스",
+    "T": "티",
+    "U": "유",
+    "V": "브이",
+    "W": "더블유",
+    "X": "엑스",
+    "Y": "와이",
+    "Z": "지",
+}
+_PHRASE_MAP = [
+    ("Automatic Document Feeder", "오토매틱 도큐먼트 피더"),
+    ("Naver Blog", "네이버 블로그"),
+    ("Brother Korea", "브라더 코리아"),
+]


 def _get_mms():
@@ -73,10 +107,24 @@ def _select_korean_voice(engine: pyttsx3.Engine) -> None:
                continue


+def _spell_abbrev(match: re.Match) -> str:
+    return " ".join(_LETTER_KO.get(ch, ch) for ch in match.group(0))
+
+
+def _preprocess_text(text: str) -> str:
+    # 영어 약어/브랜드 발음 보정
+    for src, dst in _PHRASE_MAP:
+        text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE)
+    text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text)
+    return text
+
+
 def text_to_mp3(text: str, mp3_path: str) -> None:
    if not text:
        raise RuntimeError("텍스트가 비어 있습니다.")

+    text = _preprocess_text(text)
+
    mp3_target = Path(mp3_path)
    mp3_target.parent.mkdir(parents=True, exist_ok=True)