Add OpenAI Whisper test script with ffmpeg fallback via imageio-ffmpeg

- test.py: patch whisper.audio.run for bundled ffmpeg when ffmpeg is not on PATH; expanduser for paths - requirements.txt: add imageio-ffmpeg for optional local transcription tooling Made-with: Cursor
2026-03-23 10:43:30 +09:00
parent 7adaa73102
commit 78244da09f
2 changed files with 84 additions and 0 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,5 +3,6 @@ uvicorn[standard]
 python-multipart
 pydantic
 faster-whisper
+imageio-ffmpeg
 psycopg[binary]
 python-dotenv
--- a/test.py
+++ b/test.py
@@ -0,0 +1,83 @@
+# whisper_stt.py — OpenAI Whisper CLI (m4a/mp3 등 디코딩에 ffmpeg 필요)
+from __future__ import annotations
+
+import os
+import shutil
+import sys
+
+import whisper
+import whisper.audio as whisper_audio
+
+
+def _resolve_ffmpeg_exe() -> str:
+    """PATH의 ffmpeg 또는 imageio-ffmpeg 번들 바이너리."""
+    path = shutil.which("ffmpeg")
+    if path:
+        return path
+    try:
+        import imageio_ffmpeg
+
+        return imageio_ffmpeg.get_ffmpeg_exe()
+    except ImportError:
+        pass
+    print(
+        "오류: ffmpeg를 찾을 수 없습니다. Whisper는 m4a/mp3 등을 ffmpeg로 디코딩합니다.\n\n"
+        "설치 방법(택 1):\n"
+        "  • Homebrew:  brew install ffmpeg\n"
+        "  • conda:     conda install -c conda-forge ffmpeg\n"
+        "  • pip 번들:  pip install imageio-ffmpeg\n"
+        "              (이 프로젝트 requirements.txt에 포함되어 있으면 pip install -r requirements.txt)\n",
+        file=sys.stderr,
+    )
+    sys.exit(1)
+
+
+def _patch_whisper_ffmpeg() -> None:
+    """whisper.audio는 명령 이름 'ffmpeg'만 사용하므로, 실제 경로로 치환한다."""
+    ffmpeg_exe = _resolve_ffmpeg_exe()
+    _orig_run = whisper_audio.run
+
+    def _run(cmd, *args, **kwargs):
+        if isinstance(cmd, (list, tuple)) and cmd and cmd[0] == "ffmpeg":
+            cmd = [ffmpeg_exe] + list(cmd[1:])
+        return _orig_run(cmd, *args, **kwargs)
+
+    whisper_audio.run = _run  # type: ignore[method-assign]
+
+
+def main():
+    if len(sys.argv) != 3:
+        print("사용법: python test.py <입력파일> <출력파일>")
+        sys.exit(1)
+
+    input_file = os.path.expanduser(sys.argv[1])
+    output_file = os.path.expanduser(sys.argv[2])
+
+    if not os.path.exists(input_file):
+        print(f"오류: 입력 파일이 존재하지 않습니다: {input_file}")
+        sys.exit(1)
+
+    _patch_whisper_ffmpeg()
+
+    print("모델 로드 중... (medium, 한글 최적화)")
+    model = whisper.load_model("medium")
+
+    print(f"변환 시작: {input_file}")
+    result = model.transcribe(
+        input_file,
+        language="ko",
+        fp16=False,
+    )
+
+    # 결과 출력
+    print("\n===== 변환 결과 미리보기 =====\n")
+    print(result["text"][:500], "..." if len(result["text"]) > 500 else "")  # 앞부분 500자만
+
+    # 결과 파일 저장
+    with open(output_file, "w", encoding="utf-8") as f:
+        f.write(result["text"])
+    
+    print(f"\n변환 완료. 출력 파일: {output_file}")
+
+if __name__ == "__main__":
+    main()