Normalize English terms for Korean TTS

Preprocess text to spell abbreviations and map common English phrases to Korean pronunciation.
This commit is contained in:
dsyoon
2026-01-30 20:17:17 +09:00
parent 1e8ff342c7
commit 8cdfa1bf4f

View File

@@ -1,4 +1,5 @@
import os
import re
import subprocess
import tempfile
from pathlib import Path
@@ -7,6 +8,39 @@ from typing import Optional, Tuple
import pyttsx3
_MMS_CACHE: Optional[Tuple[object, object]] = None
_LETTER_KO = {
"A": "에이",
"B": "",
"C": "",
"D": "",
"E": "",
"F": "에프",
"G": "",
"H": "에이치",
"I": "아이",
"J": "제이",
"K": "케이",
"L": "",
"M": "",
"N": "",
"O": "",
"P": "",
"Q": "",
"R": "",
"S": "에스",
"T": "",
"U": "",
"V": "브이",
"W": "더블유",
"X": "엑스",
"Y": "와이",
"Z": "",
}
_PHRASE_MAP = [
("Automatic Document Feeder", "오토매틱 도큐먼트 피더"),
("Naver Blog", "네이버 블로그"),
("Brother Korea", "브라더 코리아"),
]
def _get_mms():
@@ -73,10 +107,24 @@ def _select_korean_voice(engine: pyttsx3.Engine) -> None:
continue
def _spell_abbrev(match: re.Match) -> str:
return " ".join(_LETTER_KO.get(ch, ch) for ch in match.group(0))
def _preprocess_text(text: str) -> str:
# 영어 약어/브랜드 발음 보정
for src, dst in _PHRASE_MAP:
text = re.sub(rf"\b{re.escape(src)}\b", dst, text, flags=re.IGNORECASE)
text = re.sub(r"\b[A-Z]{2,6}\b", _spell_abbrev, text)
return text
def text_to_mp3(text: str, mp3_path: str) -> None:
if not text:
raise RuntimeError("텍스트가 비어 있습니다.")
text = _preprocess_text(text)
mp3_target = Path(mp3_path)
mp3_target.parent.mkdir(parents=True, exist_ok=True)