Rollback web speaker diarization; HF token helper for whisper_stt

- Remove app/diarize.py and pyannote from requirements.txt; web uses faster-whisper only
- Revert main.py job/transcribe flow and index.html meta/hints
- Add app/pyannote_auth.py for Pipeline.from_pretrained(..., token=...) used by whisper_stt
- Expand whisper_stt / README / requirements-whisper-stt for gated repos (community-1, 403)

Made-with: Cursor
This commit is contained in:
dosangyoon
2026-03-23 13:31:38 +09:00
parent 2e503d1a56
commit 13d1f75b34
8 changed files with 60 additions and 220 deletions

26
app/pyannote_auth.py Normal file
View File

@@ -0,0 +1,26 @@
"""pyannote 파이프라인은 config가 허브의 gated 하위 모델(segmentation·embedding 등)을 가리킬 수 있어 토큰이 필요하다."""
from __future__ import annotations
import os
from pathlib import Path
from typing import Any
def hf_token_for_pyannote() -> str | bool:
"""명시 토큰 또는 True(= huggingface-cli 로그인 캐시)."""
for key in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "PYANNOTE_AUTH_TOKEN"):
v = os.environ.get(key, "").strip()
if v:
return v
return True
def load_pyannote_pipeline(model_dir: str | Path) -> Any:
from pyannote.audio import Pipeline
auth = hf_token_for_pyannote()
path = str(model_dir)
try:
return Pipeline.from_pretrained(path, token=auth)
except TypeError:
return Pipeline.from_pretrained(path, use_auth_token=auth)