From 35dae473ec72e84eebbbdd0136fed90e95a24974 Mon Sep 17 00:00:00 2001
From: dsyoon <dosangyoon@gmail.com>
Date: Fri, 30 Jan 2026 20:32:24 +0900
Subject: [PATCH] Apply uroman preprocessing for MMS

Romanize Korean input for MMS, guard empty tokens, and add uroman dependency.
---
 requirements.txt      |  1 +
 server/tts_service.py | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 15e56eb..d0b3ec6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ jinja2
 torch
 transformers
 soundfile
+uroman
diff --git a/server/tts_service.py b/server/tts_service.py
index d28f709..14a681e 100644
--- a/server/tts_service.py
+++ b/server/tts_service.py
@@ -75,7 +75,22 @@ def _text_to_wav_mms(text: str, wav_path: str) -> None:
         raise RuntimeError("MMS TTS 사용을 위해 soundfile 설치가 필요합니다.") from exc
 
     model, tokenizer = _get_mms()
+
+    text = text.strip()
+    if not text:
+        raise RuntimeError("MMS 입력 텍스트가 비어 있습니다.")
+
+    # 한국어 입력은 uroman 전처리가 필요할 수 있음
+    try:
+        from uroman import uroman
+
+        text = uroman(text)
+    except Exception:
+        pass
+
     inputs = tokenizer(text, return_tensors="pt")
+    if inputs["input_ids"].shape[1] == 0:
+        raise RuntimeError("MMS 토크나이저 입력이 비어 있습니다.")
     with torch.no_grad():
         audio = model(**inputs).waveform.squeeze().cpu().numpy()