From 35dae473ec72e84eebbbdd0136fed90e95a24974 Mon Sep 17 00:00:00 2001 From: dsyoon Date: Fri, 30 Jan 2026 20:32:24 +0900 Subject: [PATCH] Apply uroman preprocessing for MMS Romanize Korean input for MMS, guard empty tokens, and add uroman dependency. --- requirements.txt | 1 + server/tts_service.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/requirements.txt b/requirements.txt index 15e56eb..d0b3ec6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ jinja2 torch transformers soundfile +uroman diff --git a/server/tts_service.py b/server/tts_service.py index d28f709..14a681e 100644 --- a/server/tts_service.py +++ b/server/tts_service.py @@ -75,7 +75,22 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None: raise RuntimeError("MMS TTS 사용을 위해 soundfile 설치가 필요합니다.") from exc model, tokenizer = _get_mms() + + text = text.strip() + if not text: + raise RuntimeError("MMS 입력 텍스트가 비어 있습니다.") + + # 한국어 입력은 uroman 전처리가 필요할 수 있음 + try: + from uroman import uroman + + text = uroman(text) + except Exception: + pass + inputs = tokenizer(text, return_tensors="pt") + if inputs["input_ids"].shape[1] == 0: + raise RuntimeError("MMS 토크나이저 입력이 비어 있습니다.") with torch.no_grad(): audio = model(**inputs).waveform.squeeze().cpu().numpy()