Update mic meter and speech handling

This commit is contained in:
dsyoon
2026-02-01 10:54:28 +09:00
parent a8a51a19c1
commit 3e3cafe6ea
5 changed files with 342 additions and 151 deletions

View File

@@ -42,6 +42,16 @@
font-size: 16px;
}
.interim-box {
background: #f3f4f6;
border: 1px solid #e5e7eb;
border-radius: 8px;
padding: 10px 12px;
font-size: 14px;
color: #374151;
min-height: 42px;
}
.transcript-panel {
flex: 3 1 0;
min-height: 0;
@@ -103,6 +113,7 @@
.controls {
display: flex;
gap: 12px;
align-items: center;
}
button {
@@ -114,8 +125,8 @@ button {
}
.record-btn {
background: #e5e7eb;
color: #111827;
background: #fca5a5;
color: #7f1d1d;
}
.record-btn.recording {
@@ -129,11 +140,34 @@ button {
color: #fff;
}
.save-btn {
background: #2563eb;
color: #fff;
.mic-meter {
position: relative;
width: 140px;
height: 10px;
background: #e5e7eb;
border-radius: 999px;
overflow: hidden;
}
.mic-meter-bar {
height: 100%;
background: #ef4444;
width: 0%;
transition: width 80ms linear;
}
.mic-meter-label {
position: absolute;
right: 8px;
top: 50%;
transform: translateY(-50%);
font-size: 10px;
font-weight: 600;
color: #111827;
pointer-events: none;
}
.meeting-list {
flex: 1;
border: 1px solid #f0f0f0;
@@ -143,6 +177,25 @@ button {
background: #fafafa;
}
.meeting-list-toolbar {
display: flex;
justify-content: flex-end;
margin-bottom: 8px;
}
.select-all-btn {
background: #e5e7eb;
color: #111827;
padding: 6px 12px;
border-radius: 999px;
font-size: 12px;
}
.select-all-btn.active {
background: #111827;
color: #fff;
}
.meeting-item {
margin-bottom: 10px;
}

View File

@@ -2,14 +2,7 @@ import { useEffect, useMemo, useRef, useState } from 'react'
import './App.css'
import TranscriptPanel from './components/TranscriptPanel'
import MeetingList from './components/MeetingList'
import {
createMeeting,
deleteMeetings,
endMeeting,
fetchMeeting,
fetchMeetings,
saveUtterance,
} from './lib/api'
import { createMeeting, deleteMeetings, endMeeting, fetchMeeting, fetchMeetings } from './lib/api'
function App() {
const [isRecording, setIsRecording] = useState(false)
@@ -25,20 +18,31 @@ function App() {
new Set()
)
const [errorMessage, setErrorMessage] = useState<string | null>(null)
const recognitionRef = useRef<SpeechRecognition | null>(null)
const liveTextRef = useRef('')
const [micLevel, setMicLevel] = useState(0)
const lineIdRef = useRef(1)
const meetingIdRef = useRef<number | null>(null)
const pendingUtterancesRef = useRef<{ ts: string; text: string }[]>([])
const recognitionRef = useRef<SpeechRecognition | null>(null)
const isRecordingRef = useRef(false)
const lastResultAtRef = useRef<number>(Date.now())
const restartLockRef = useRef(false)
const isStartingRef = useRef(false)
const stopRequestedRef = useRef(false)
const maxLinesRef = useRef(500)
const micLevelRef = useRef(0)
const pendingTranscriptRef = useRef('')
const lastAutoFinalAtRef = useRef(0)
const lastAutoFinalTextRef = useRef('')
const lastVoiceAtRef = useRef(0)
const lastSilenceFinalAtRef = useRef(0)
const lastResultAtRef = useRef(0)
const noResultTimerRef = useRef<number | null>(null)
const resetPendingRef = useRef(false)
const lastFinalTextRef = useRef('')
const lastFinalAtRef = useRef(0)
const audioContextRef = useRef<AudioContext | null>(null)
const hasSpeechRecognition = useMemo(() => {
return 'SpeechRecognition' in window || 'webkitSpeechRecognition' in window
}, [])
const analyserRef = useRef<AnalyserNode | null>(null)
const mediaStreamRef = useRef<MediaStream | null>(null)
const meterRafRef = useRef<number | null>(null)
useEffect(() => {
fetchMeetings()
@@ -46,169 +50,250 @@ function App() {
.catch((err) => setErrorMessage(err.message))
}, [])
useEffect(() => {
if (!isRecording) return
const intervalId = window.setInterval(() => {
if (!isRecordingRef.current) return
const clearNoResultTimer = () => {
if (noResultTimerRef.current !== null) {
window.clearTimeout(noResultTimerRef.current)
noResultTimerRef.current = null
}
}
const scheduleNoResultReset = () => {
clearNoResultTimer()
if (!isRecordingRef.current || stopRequestedRef.current) return
noResultTimerRef.current = window.setTimeout(() => {
noResultTimerRef.current = null
if (!isRecordingRef.current || stopRequestedRef.current) return
const now = Date.now()
if (now - lastResultAtRef.current > 4000) {
void safeRestartRecognition()
}
}, 2000)
return () => window.clearInterval(intervalId)
}, [isRecording])
const persistFinal = async (ts: string, text: string) => {
if (!meetingIdRef.current) {
pendingUtterancesRef.current.push({ ts, text })
return
}
if (now - lastResultAtRef.current < 1500) return
if (resetPendingRef.current) return
resetPendingRef.current = true
try {
await saveUtterance(meetingIdRef.current, text, ts)
} catch (err) {
setErrorMessage((err as Error).message)
recognitionRef.current?.stop()
} catch {
// ignore stop errors
}
window.setTimeout(() => {
resetPendingRef.current = false
if (isRecordingRef.current && !stopRequestedRef.current) {
startRecognition()
}
const updateTranscript = (text: string, isFinal: boolean) => {
}, 300)
}, 1500)
}
const appendFinalLine = (text: string) => {
const trimmed = text.trim()
if (!trimmed) return
const tokenCount = trimmed.split(/\s+/).filter(Boolean).length
if (tokenCount < 2) return
const now = Date.now()
if (now - lastFinalAtRef.current < 1200) {
const last = lastFinalTextRef.current
if (last && (last.includes(trimmed) || trimmed.includes(last))) {
return
}
}
const ts = new Date().toISOString()
liveTextRef.current = isFinal ? '' : trimmed
setTranscriptLines((prev) => {
const last = prev[prev.length - 1]
if (last && !last.isFinal) {
return [
...prev.slice(0, -1),
{ ...last, text: trimmed, ts, isFinal },
]
}
if (last && last.isFinal && isFinal && last.text.trim() === trimmed) {
return prev
}
return [...prev, { id: lineIdRef.current++, ts, text: trimmed, isFinal }]
const next = [...prev, { id: lineIdRef.current++, ts, text: trimmed, isFinal: true }]
const overflow = next.length - maxLinesRef.current
return overflow > 0 ? next.slice(overflow) : next
})
if (isFinal) {
void persistFinal(ts, trimmed)
}
lastFinalTextRef.current = trimmed
lastFinalAtRef.current = now
}
const startRecognition = () => {
const SpeechRecognitionConstructor =
window.SpeechRecognition || window.webkitSpeechRecognition
if (!SpeechRecognitionConstructor) {
setErrorMessage('이 브라우저에서는 STT를 지원하지 않습니다. Chrome을 사용해 주세요.')
return
}
const recognition = new SpeechRecognitionConstructor()
const recognition = recognitionRef.current ?? new SpeechRecognitionConstructor()
recognition.lang = 'ko-KR'
recognition.interimResults = true
recognition.continuous = true
recognition.maxAlternatives = 3
recognition.maxAlternatives = 1
recognition.onresult = (event) => {
lastResultAtRef.current = Date.now()
for (let i = event.resultIndex; i < event.results.length; i += 1) {
const result = event.results[i]
const text = result[0].transcript
updateTranscript(text, result.isFinal)
if (!result || !result[0]) continue
const transcript = result[0].transcript
if (!transcript) continue
pendingTranscriptRef.current = transcript
if (result.isFinal) {
appendFinalLine(transcript)
}
}
scheduleNoResultReset()
}
recognition.onerror = () => {
recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
const errorCode = event?.error
if (errorCode === 'aborted' || errorCode === 'no-speech') {
return
}
setErrorMessage('음성 인식 중 오류가 발생했습니다.')
}
recognition.onend = () => {
liveTextRef.current = ''
if (isRecordingRef.current) {
if (isRecordingRef.current && !stopRequestedRef.current) {
window.setTimeout(() => {
void safeRestartRecognition()
startRecognition()
}, 200)
} else {
setIsRecording(false)
}
}
recognitionRef.current = recognition
if (!isStartingRef.current) {
isStartingRef.current = true
try {
recognition.start()
} catch {
// ignore start errors
} finally {
window.setTimeout(() => {
isStartingRef.current = false
}, 200)
}
scheduleNoResultReset()
}
useEffect(() => {
return () => {
stopMeter()
}
}, [])
const startMeter = async () => {
if (meterRafRef.current !== null) return
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
mediaStreamRef.current = stream
const AudioCtx = window.AudioContext || (window as typeof window & {
webkitAudioContext?: typeof AudioContext
}).webkitAudioContext
if (!AudioCtx) return
const audioContext = new AudioCtx()
audioContextRef.current = audioContext
const analyser = audioContext.createAnalyser()
analyser.fftSize = 2048
analyserRef.current = analyser
const source = audioContext.createMediaStreamSource(stream)
source.connect(analyser)
const data = new Uint8Array(analyser.fftSize)
const loop = () => {
if (!analyserRef.current) return
analyserRef.current.getByteTimeDomainData(data)
let sum = 0
for (let i = 0; i < data.length; i += 1) {
const v = (data[i] - 128) / 128
sum += v * v
}
const rms = Math.sqrt(sum / data.length)
const scaled = Math.log10(1 + rms * 120) / Math.log10(121)
const level = Math.min(1, Math.max(0, scaled))
const smooth = level * 0.5 + micLevelRef.current * 0.5
micLevelRef.current = smooth
setMicLevel(smooth)
if (isRecordingRef.current) {
const percent = smooth * 100
const now = Date.now()
if (percent >= 8) {
lastVoiceAtRef.current = now
}
const silenceMs = now - lastVoiceAtRef.current
if (
silenceMs >= 900 &&
pendingTranscriptRef.current &&
now - lastSilenceFinalAtRef.current > 1200 &&
pendingTranscriptRef.current !== lastAutoFinalTextRef.current
) {
appendFinalLine(pendingTranscriptRef.current)
lastSilenceFinalAtRef.current = now
lastAutoFinalAtRef.current = now
lastAutoFinalTextRef.current = pendingTranscriptRef.current
pendingTranscriptRef.current = ''
}
}
meterRafRef.current = window.requestAnimationFrame(loop)
}
meterRafRef.current = window.requestAnimationFrame(loop)
} catch (err) {
setErrorMessage((err as Error).message)
}
}
const stopMeter = () => {
if (meterRafRef.current !== null) {
window.cancelAnimationFrame(meterRafRef.current)
meterRafRef.current = null
}
if (analyserRef.current) {
analyserRef.current.disconnect()
analyserRef.current = null
}
if (audioContextRef.current) {
audioContextRef.current.close().catch(() => undefined)
audioContextRef.current = null
}
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach((track) => track.stop())
mediaStreamRef.current = null
}
setMicLevel(0)
micLevelRef.current = 0
pendingTranscriptRef.current = ''
lastAutoFinalAtRef.current = 0
lastAutoFinalTextRef.current = ''
lastVoiceAtRef.current = 0
lastSilenceFinalAtRef.current = 0
clearNoResultTimer()
}
const handleStart = async () => {
setErrorMessage(null)
lineIdRef.current = 1
pendingUtterancesRef.current = []
setTranscriptLines([])
meetingIdRef.current = null
setCurrentMeetingId(null)
setIsRecording(true)
isRecordingRef.current = true
lastResultAtRef.current = Date.now()
stopRequestedRef.current = false
lastResultAtRef.current = 0
resetPendingRef.current = false
lastVoiceAtRef.current = 0
lastSilenceFinalAtRef.current = 0
scheduleNoResultReset()
pendingTranscriptRef.current = ''
lastAutoFinalAtRef.current = 0
lastAutoFinalTextRef.current = ''
if (hasSpeechRecognition) {
startRecognition()
}
void startMeter()
try {
const result = await createMeeting(new Date().toISOString())
meetingIdRef.current = result.id
setCurrentMeetingId(result.id)
const pending = [...pendingUtterancesRef.current]
pendingUtterancesRef.current = []
await Promise.all(
pending.map((item) => saveUtterance(result.id, item.text, item.ts))
)
} catch (err) {
setErrorMessage((err as Error).message)
}
}
const handleStop = async () => {
if (!meetingIdRef.current) return
setErrorMessage(null)
recognitionRef.current?.stop()
liveTextRef.current = ''
setIsRecording(false)
isRecordingRef.current = false
stopRequestedRef.current = true
lastVoiceAtRef.current = 0
lastSilenceFinalAtRef.current = 0
clearNoResultTimer()
pendingTranscriptRef.current = ''
recognitionRef.current?.stop()
stopMeter()
try {
if (meetingIdRef.current) {
await endMeeting(meetingIdRef.current, new Date().toISOString())
const list = await fetchMeetings()
setMeetingsList(list)
} catch (err) {
setErrorMessage((err as Error).message)
}
}
const safeRestartRecognition = async () => {
if (!recognitionRef.current || restartLockRef.current) return
restartLockRef.current = true
try {
recognitionRef.current.stop()
recognitionRef.current.start()
lastResultAtRef.current = Date.now()
} catch {
// ignore restart errors
} finally {
window.setTimeout(() => {
restartLockRef.current = false
}, 500)
}
}
const handleSave = async () => {
if (!meetingIdRef.current) return
setErrorMessage(null)
try {
await endMeeting(meetingIdRef.current, new Date().toISOString())
} catch (err) {
setErrorMessage((err as Error).message)
}
@@ -253,6 +338,15 @@ function App() {
setSelectedMeetingIds(next)
}
const handleToggleAll = () => {
if (meetingsList.length === 0) return
if (selectedMeetingIds.size === meetingsList.length) {
setSelectedMeetingIds(new Set())
return
}
setSelectedMeetingIds(new Set(meetingsList.map((meeting) => meeting.id)))
}
const handleDelete = async () => {
if (selectedMeetingIds.size === 0) return
setErrorMessage(null)
@@ -277,7 +371,11 @@ function App() {
<div className="app">
<div className="left-panel">
{errorMessage && <div className="error-banner">{errorMessage}</div>}
<TranscriptPanel transcriptLines={transcriptLines} />
<TranscriptPanel
transcriptLines={transcriptLines}
interimText=""
isRecording={isRecording}
/>
<div className="controls">
<button
type="button"
@@ -290,13 +388,14 @@ function App() {
<button type="button" className="stop-btn" onClick={handleStop} disabled={!isRecording}>
</button>
<button type="button" className="save-btn" onClick={handleSave} disabled={!currentMeetingId}>
</button>
<div className="mic-meter" aria-hidden="true">
<div
className="mic-meter-bar"
style={{ width: `${Math.round(micLevel * 100)}%` }}
/>
<span className="mic-meter-label">{Math.round(micLevel * 100)}%</span>
</div>
</div>
{!hasSpeechRecognition && (
<div className="hint">Chrome에서만 Web Speech API가 .</div>
)}
</div>
<div className="right-panel">
<div className="panel-title"> </div>
@@ -304,7 +403,9 @@ function App() {
meetings={meetingsList}
isEditMode={isEditMode}
selectedIds={selectedMeetingIds}
allSelected={meetingsList.length > 0 && selectedMeetingIds.size === meetingsList.length}
onToggleSelect={handleToggleSelect}
onToggleAll={handleToggleAll}
onSelectMeeting={handleSelectMeeting}
/>
<div className="list-controls">

View File

@@ -4,7 +4,9 @@ type Props = {
meetings: MeetingSummary[]
isEditMode: boolean
selectedIds: Set<number>
allSelected: boolean
onToggleSelect: (id: number) => void
onToggleAll: () => void
onSelectMeeting: (id: number) => void
}
@@ -24,11 +26,24 @@ export default function MeetingList({
meetings,
isEditMode,
selectedIds,
allSelected,
onToggleSelect,
onToggleAll,
onSelectMeeting,
}: Props) {
return (
<div className="meeting-list">
{isEditMode && (
<div className="meeting-list-toolbar">
<button
type="button"
className={`select-all-btn ${allSelected ? 'active' : ''}`.trim()}
onClick={onToggleAll}
>
</button>
</div>
)}
{meetings.length === 0 && (
<div className="placeholder"> .</div>
)}

View File

@@ -7,12 +7,17 @@ type TranscriptLine = {
type Props = {
transcriptLines: TranscriptLine[]
interimText: string
isRecording: boolean
}
export default function TranscriptPanel({ transcriptLines }: Props) {
export default function TranscriptPanel({ transcriptLines, interimText, isRecording }: Props) {
return (
<div className="panel transcript-panel">
<div className="panel-title">/STT</div>
<div className="interim-box">
{interimText ? interimText : ''}
</div>
<div className="transcript-content">
{transcriptLines.length === 0 && (
<div className="placeholder"> STT .</div>

View File

@@ -1,18 +1,34 @@
export {}
declare global {
interface SpeechRecognitionEvent extends Event {
resultIndex: number
results: SpeechRecognitionResultList
}
interface SpeechRecognitionErrorEvent extends Event {
error:
| 'no-speech'
| 'aborted'
| 'audio-capture'
| 'network'
| 'not-allowed'
| 'service-not-allowed'
| string
}
interface SpeechRecognition extends EventTarget {
continuous: boolean
interimResults: boolean
lang: string
maxAlternatives: number
onstart: (() => void) | null
onresult: ((event: SpeechRecognitionEvent) => void) | null
onerror: ((event: Event) => void) | null
onerror: ((event: SpeechRecognitionErrorEvent) => void) | null
onend: (() => void) | null
start: () => void
stop: () => void
abort: () => void
}
interface SpeechRecognitionConstructor {
@@ -23,3 +39,4 @@ interface Window {
SpeechRecognition?: SpeechRecognitionConstructor
webkitSpeechRecognition?: SpeechRecognitionConstructor
}
}