Initial commit after re-install
This commit is contained in:
86
app/stt.py
Normal file
86
app/stt.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Iterable, Tuple
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SegmentOut:
|
||||
start: float
|
||||
end: float
|
||||
text: str
|
||||
|
||||
|
||||
_MODEL: WhisperModel | None = None
|
||||
|
||||
|
||||
def _get_model() -> WhisperModel:
|
||||
global _MODEL
|
||||
if _MODEL is not None:
|
||||
return _MODEL
|
||||
|
||||
model_name = os.getenv("APP_WHISPER_MODEL", "small")
|
||||
device = os.getenv("APP_WHISPER_DEVICE", "cpu")
|
||||
compute_type = os.getenv("APP_WHISPER_COMPUTE_TYPE", "int8")
|
||||
|
||||
# WhisperModel download/cache handled by faster-whisper internally.
|
||||
_MODEL = WhisperModel(model_name, device=device, compute_type=compute_type)
|
||||
return _MODEL
|
||||
|
||||
|
||||
def transcribe_iter(
|
||||
audio_path: str,
|
||||
*,
|
||||
language: str | None = None,
|
||||
vad_filter: bool = True,
|
||||
beam_size: int = 5,
|
||||
) -> Tuple[Iterable[Any], Any]:
|
||||
model = _get_model()
|
||||
segments_iter, info = model.transcribe(
|
||||
audio_path,
|
||||
language=language,
|
||||
vad_filter=vad_filter,
|
||||
beam_size=beam_size,
|
||||
)
|
||||
return segments_iter, info
|
||||
|
||||
|
||||
def transcribe_file(
|
||||
audio_path: str,
|
||||
*,
|
||||
language: str | None = None,
|
||||
vad_filter: bool = True,
|
||||
beam_size: int = 5,
|
||||
) -> dict[str, Any]:
|
||||
segments_iter, info = transcribe_iter(
|
||||
audio_path,
|
||||
language=language,
|
||||
vad_filter=vad_filter,
|
||||
beam_size=beam_size,
|
||||
)
|
||||
|
||||
segments: list[SegmentOut] = []
|
||||
texts: list[str] = []
|
||||
for s in _iter_segments(segments_iter):
|
||||
seg = SegmentOut(start=float(s.start), end=float(s.end), text=(s.text or "").strip())
|
||||
if seg.text:
|
||||
segments.append(seg)
|
||||
texts.append(seg.text)
|
||||
|
||||
full_text = "\n".join(texts).strip()
|
||||
return {
|
||||
"text": full_text,
|
||||
"segments": [seg.__dict__ for seg in segments],
|
||||
"detected_language": getattr(info, "language", None),
|
||||
"language_probability": getattr(info, "language_probability", None),
|
||||
"duration_sec": getattr(info, "duration", None),
|
||||
}
|
||||
|
||||
|
||||
def _iter_segments(segments_iter: Iterable[Any]) -> Iterable[Any]:
|
||||
for s in segments_iter:
|
||||
yield s
|
||||
|
||||
Reference in New Issue
Block a user