Update mic meter and speech handling

2026-02-01 10:54:28 +09:00
parent a8a51a19c1
commit 3e3cafe6ea
5 changed files with 342 additions and 151 deletions
--- a/client/src/App.css
+++ b/client/src/App.css
@@ -42,6 +42,16 @@
  font-size: 16px;
 }

+.interim-box {
+  background: #f3f4f6;
+  border: 1px solid #e5e7eb;
+  border-radius: 8px;
+  padding: 10px 12px;
+  font-size: 14px;
+  color: #374151;
+  min-height: 42px;
+}
+
 .transcript-panel {
  flex: 3 1 0;
  min-height: 0;
@@ -103,6 +113,7 @@
 .controls {
  display: flex;
  gap: 12px;
+  align-items: center;
 }

 button {
@@ -114,8 +125,8 @@ button {
 }

 .record-btn {
-  background: #e5e7eb;
-  color: #111827;
+  background: #fca5a5;
+  color: #7f1d1d;
 }

 .record-btn.recording {
@@ -129,11 +140,34 @@ button {
  color: #fff;
 }

-.save-btn {
-  background: #2563eb;
-  color: #fff;
+.mic-meter {
+  position: relative;
+  width: 140px;
+  height: 10px;
+  background: #e5e7eb;
+  border-radius: 999px;
+  overflow: hidden;
 }

+.mic-meter-bar {
+  height: 100%;
+  background: #ef4444;
+  width: 0%;
+  transition: width 80ms linear;
+}
+
+.mic-meter-label {
+  position: absolute;
+  right: 8px;
+  top: 50%;
+  transform: translateY(-50%);
+  font-size: 10px;
+  font-weight: 600;
+  color: #111827;
+  pointer-events: none;
+}
+
+
 .meeting-list {
  flex: 1;
  border: 1px solid #f0f0f0;
@@ -143,6 +177,25 @@ button {
  background: #fafafa;
 }

+.meeting-list-toolbar {
+  display: flex;
+  justify-content: flex-end;
+  margin-bottom: 8px;
+}
+
+.select-all-btn {
+  background: #e5e7eb;
+  color: #111827;
+  padding: 6px 12px;
+  border-radius: 999px;
+  font-size: 12px;
+}
+
+.select-all-btn.active {
+  background: #111827;
+  color: #fff;
+}
+
 .meeting-item {
  margin-bottom: 10px;
 }
--- a/client/src/App.tsx
+++ b/client/src/App.tsx
@@ -2,14 +2,7 @@ import { useEffect, useMemo, useRef, useState } from 'react'
 import './App.css'
 import TranscriptPanel from './components/TranscriptPanel'
 import MeetingList from './components/MeetingList'
-import {
-  createMeeting,
-  deleteMeetings,
-  endMeeting,
-  fetchMeeting,
-  fetchMeetings,
-  saveUtterance,
-} from './lib/api'
+import { createMeeting, deleteMeetings, endMeeting, fetchMeeting, fetchMeetings } from './lib/api'

 function App() {
  const [isRecording, setIsRecording] = useState(false)
@@ -25,20 +18,31 @@ function App() {
    new Set()
  )
  const [errorMessage, setErrorMessage] = useState<string | null>(null)
-
-  const recognitionRef = useRef<SpeechRecognition | null>(null)
-  const liveTextRef = useRef('')
+  const [micLevel, setMicLevel] = useState(0)
  const lineIdRef = useRef(1)
  const meetingIdRef = useRef<number | null>(null)
-  const pendingUtterancesRef = useRef<{ ts: string; text: string }[]>([])
+  const recognitionRef = useRef<SpeechRecognition | null>(null)
  const isRecordingRef = useRef(false)
-  const lastResultAtRef = useRef<number>(Date.now())
-  const restartLockRef = useRef(false)
-  const isStartingRef = useRef(false)
-
+  const stopRequestedRef = useRef(false)
+  const maxLinesRef = useRef(500)
+  const micLevelRef = useRef(0)
+  const pendingTranscriptRef = useRef('')
+  const lastAutoFinalAtRef = useRef(0)
+  const lastAutoFinalTextRef = useRef('')
+  const lastVoiceAtRef = useRef(0)
+  const lastSilenceFinalAtRef = useRef(0)
+  const lastResultAtRef = useRef(0)
+  const noResultTimerRef = useRef<number | null>(null)
+  const resetPendingRef = useRef(false)
+  const lastFinalTextRef = useRef('')
+  const lastFinalAtRef = useRef(0)
+  const audioContextRef = useRef<AudioContext | null>(null)
  const hasSpeechRecognition = useMemo(() => {
    return 'SpeechRecognition' in window || 'webkitSpeechRecognition' in window
  }, [])
+  const analyserRef = useRef<AnalyserNode | null>(null)
+  const mediaStreamRef = useRef<MediaStream | null>(null)
+  const meterRafRef = useRef<number | null>(null)

  useEffect(() => {
    fetchMeetings()
@@ -46,169 +50,250 @@ function App() {
      .catch((err) => setErrorMessage(err.message))
  }, [])

-  useEffect(() => {
-    if (!isRecording) return
-    const intervalId = window.setInterval(() => {
-      if (!isRecordingRef.current) return
+  const clearNoResultTimer = () => {
+    if (noResultTimerRef.current !== null) {
+      window.clearTimeout(noResultTimerRef.current)
+      noResultTimerRef.current = null
+    }
+  }
+
+  const scheduleNoResultReset = () => {
+    clearNoResultTimer()
+    if (!isRecordingRef.current || stopRequestedRef.current) return
+    noResultTimerRef.current = window.setTimeout(() => {
+      noResultTimerRef.current = null
+      if (!isRecordingRef.current || stopRequestedRef.current) return
      const now = Date.now()
-      if (now - lastResultAtRef.current > 4000) {
-        void safeRestartRecognition()
-      }
-    }, 2000)
-    return () => window.clearInterval(intervalId)
-  }, [isRecording])
-
-  const persistFinal = async (ts: string, text: string) => {
-    if (!meetingIdRef.current) {
-      pendingUtterancesRef.current.push({ ts, text })
-      return
-    }
+      if (now - lastResultAtRef.current < 1500) return
+      if (resetPendingRef.current) return
+      resetPendingRef.current = true
      try {
-      await saveUtterance(meetingIdRef.current, text, ts)
-    } catch (err) {
-      setErrorMessage((err as Error).message)
+        recognitionRef.current?.stop()
+      } catch {
+        // ignore stop errors
      }
+      window.setTimeout(() => {
+        resetPendingRef.current = false
+        if (isRecordingRef.current && !stopRequestedRef.current) {
+          startRecognition()
        }
-
-  const updateTranscript = (text: string, isFinal: boolean) => {
+      }, 300)
+    }, 1500)
+  }
+  const appendFinalLine = (text: string) => {
    const trimmed = text.trim()
    if (!trimmed) return
+    const tokenCount = trimmed.split(/\s+/).filter(Boolean).length
+    if (tokenCount < 2) return
+    const now = Date.now()
+    if (now - lastFinalAtRef.current < 1200) {
+      const last = lastFinalTextRef.current
+      if (last && (last.includes(trimmed) || trimmed.includes(last))) {
+        return
+      }
+    }
    const ts = new Date().toISOString()
-    liveTextRef.current = isFinal ? '' : trimmed
    setTranscriptLines((prev) => {
-      const last = prev[prev.length - 1]
-      if (last && !last.isFinal) {
-        return [
-          ...prev.slice(0, -1),
-          { ...last, text: trimmed, ts, isFinal },
-        ]
-      }
-      if (last && last.isFinal && isFinal && last.text.trim() === trimmed) {
-        return prev
-      }
-      return [...prev, { id: lineIdRef.current++, ts, text: trimmed, isFinal }]
+      const next = [...prev, { id: lineIdRef.current++, ts, text: trimmed, isFinal: true }]
+      const overflow = next.length - maxLinesRef.current
+      return overflow > 0 ? next.slice(overflow) : next
    })
-    if (isFinal) {
-      void persistFinal(ts, trimmed)
-    }
+    lastFinalTextRef.current = trimmed
+    lastFinalAtRef.current = now
  }

  const startRecognition = () => {
    const SpeechRecognitionConstructor =
      window.SpeechRecognition || window.webkitSpeechRecognition
-
    if (!SpeechRecognitionConstructor) {
      setErrorMessage('이 브라우저에서는 STT를 지원하지 않습니다. Chrome을 사용해 주세요.')
      return
    }
-
-    const recognition = new SpeechRecognitionConstructor()
+    const recognition = recognitionRef.current ?? new SpeechRecognitionConstructor()
    recognition.lang = 'ko-KR'
    recognition.interimResults = true
    recognition.continuous = true
-    recognition.maxAlternatives = 3
+    recognition.maxAlternatives = 1

    recognition.onresult = (event) => {
      lastResultAtRef.current = Date.now()
      for (let i = event.resultIndex; i < event.results.length; i += 1) {
        const result = event.results[i]
-        const text = result[0].transcript
-        updateTranscript(text, result.isFinal)
+        if (!result || !result[0]) continue
+        const transcript = result[0].transcript
+        if (!transcript) continue
+        pendingTranscriptRef.current = transcript
+        if (result.isFinal) {
+          appendFinalLine(transcript)
        }
      }
+      scheduleNoResultReset()
+    }

-    recognition.onerror = () => {
+    recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
+      const errorCode = event?.error
+      if (errorCode === 'aborted' || errorCode === 'no-speech') {
+        return
+      }
      setErrorMessage('음성 인식 중 오류가 발생했습니다.')
    }

    recognition.onend = () => {
-      liveTextRef.current = ''
-      if (isRecordingRef.current) {
+      if (isRecordingRef.current && !stopRequestedRef.current) {
        window.setTimeout(() => {
-          void safeRestartRecognition()
+          startRecognition()
        }, 200)
-      } else {
-        setIsRecording(false)
      }
    }

    recognitionRef.current = recognition
-    if (!isStartingRef.current) {
-      isStartingRef.current = true
    try {
      recognition.start()
    } catch {
      // ignore start errors
-      } finally {
-        window.setTimeout(() => {
-          isStartingRef.current = false
-        }, 200)
+    }
+    scheduleNoResultReset()
+  }
+
+  useEffect(() => {
+    return () => {
+      stopMeter()
+    }
+  }, [])
+
+  const startMeter = async () => {
+    if (meterRafRef.current !== null) return
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
+      mediaStreamRef.current = stream
+      const AudioCtx = window.AudioContext || (window as typeof window & {
+        webkitAudioContext?: typeof AudioContext
+      }).webkitAudioContext
+      if (!AudioCtx) return
+      const audioContext = new AudioCtx()
+      audioContextRef.current = audioContext
+      const analyser = audioContext.createAnalyser()
+      analyser.fftSize = 2048
+      analyserRef.current = analyser
+      const source = audioContext.createMediaStreamSource(stream)
+      source.connect(analyser)
+      const data = new Uint8Array(analyser.fftSize)
+      const loop = () => {
+        if (!analyserRef.current) return
+        analyserRef.current.getByteTimeDomainData(data)
+        let sum = 0
+        for (let i = 0; i < data.length; i += 1) {
+          const v = (data[i] - 128) / 128
+          sum += v * v
+        }
+        const rms = Math.sqrt(sum / data.length)
+        const scaled = Math.log10(1 + rms * 120) / Math.log10(121)
+        const level = Math.min(1, Math.max(0, scaled))
+        const smooth = level * 0.5 + micLevelRef.current * 0.5
+        micLevelRef.current = smooth
+        setMicLevel(smooth)
+        if (isRecordingRef.current) {
+          const percent = smooth * 100
+          const now = Date.now()
+          if (percent >= 8) {
+            lastVoiceAtRef.current = now
+          }
+          const silenceMs = now - lastVoiceAtRef.current
+          if (
+            silenceMs >= 900 &&
+            pendingTranscriptRef.current &&
+            now - lastSilenceFinalAtRef.current > 1200 &&
+            pendingTranscriptRef.current !== lastAutoFinalTextRef.current
+          ) {
+            appendFinalLine(pendingTranscriptRef.current)
+            lastSilenceFinalAtRef.current = now
+            lastAutoFinalAtRef.current = now
+            lastAutoFinalTextRef.current = pendingTranscriptRef.current
+            pendingTranscriptRef.current = ''
          }
        }
+        meterRafRef.current = window.requestAnimationFrame(loop)
+      }
+      meterRafRef.current = window.requestAnimationFrame(loop)
+    } catch (err) {
+      setErrorMessage((err as Error).message)
+    }
+  }
+
+  const stopMeter = () => {
+    if (meterRafRef.current !== null) {
+      window.cancelAnimationFrame(meterRafRef.current)
+      meterRafRef.current = null
+    }
+    if (analyserRef.current) {
+      analyserRef.current.disconnect()
+      analyserRef.current = null
+    }
+    if (audioContextRef.current) {
+      audioContextRef.current.close().catch(() => undefined)
+      audioContextRef.current = null
+    }
+    if (mediaStreamRef.current) {
+      mediaStreamRef.current.getTracks().forEach((track) => track.stop())
+      mediaStreamRef.current = null
+    }
+    setMicLevel(0)
+    micLevelRef.current = 0
+    pendingTranscriptRef.current = ''
+    lastAutoFinalAtRef.current = 0
+    lastAutoFinalTextRef.current = ''
+    lastVoiceAtRef.current = 0
+    lastSilenceFinalAtRef.current = 0
+    clearNoResultTimer()
  }

  const handleStart = async () => {
    setErrorMessage(null)
    lineIdRef.current = 1
-    pendingUtterancesRef.current = []
    setTranscriptLines([])
    meetingIdRef.current = null
    setCurrentMeetingId(null)
    setIsRecording(true)
    isRecordingRef.current = true
-    lastResultAtRef.current = Date.now()
+    stopRequestedRef.current = false
+    lastResultAtRef.current = 0
+    resetPendingRef.current = false
+    lastVoiceAtRef.current = 0
+    lastSilenceFinalAtRef.current = 0
+    scheduleNoResultReset()
+    pendingTranscriptRef.current = ''
+    lastAutoFinalAtRef.current = 0
+    lastAutoFinalTextRef.current = ''
+    if (hasSpeechRecognition) {
      startRecognition()
+    }
+    void startMeter()
    try {
      const result = await createMeeting(new Date().toISOString())
      meetingIdRef.current = result.id
      setCurrentMeetingId(result.id)
-      const pending = [...pendingUtterancesRef.current]
-      pendingUtterancesRef.current = []
-      await Promise.all(
-        pending.map((item) => saveUtterance(result.id, item.text, item.ts))
-      )
    } catch (err) {
      setErrorMessage((err as Error).message)
    }
  }

  const handleStop = async () => {
-    if (!meetingIdRef.current) return
    setErrorMessage(null)
-    recognitionRef.current?.stop()
-    liveTextRef.current = ''
    setIsRecording(false)
    isRecordingRef.current = false
+    stopRequestedRef.current = true
+    lastVoiceAtRef.current = 0
+    lastSilenceFinalAtRef.current = 0
+    clearNoResultTimer()
+    pendingTranscriptRef.current = ''
+    recognitionRef.current?.stop()
+    stopMeter()
    try {
+      if (meetingIdRef.current) {
        await endMeeting(meetingIdRef.current, new Date().toISOString())
        const list = await fetchMeetings()
        setMeetingsList(list)
-    } catch (err) {
-      setErrorMessage((err as Error).message)
      }
-  }
-
-  const safeRestartRecognition = async () => {
-    if (!recognitionRef.current || restartLockRef.current) return
-    restartLockRef.current = true
-    try {
-      recognitionRef.current.stop()
-      recognitionRef.current.start()
-      lastResultAtRef.current = Date.now()
-    } catch {
-      // ignore restart errors
-    } finally {
-      window.setTimeout(() => {
-        restartLockRef.current = false
-      }, 500)
-    }
-  }
-
-  const handleSave = async () => {
-    if (!meetingIdRef.current) return
-    setErrorMessage(null)
-    try {
-      await endMeeting(meetingIdRef.current, new Date().toISOString())
    } catch (err) {
      setErrorMessage((err as Error).message)
    }
@@ -253,6 +338,15 @@ function App() {
    setSelectedMeetingIds(next)
  }

+  const handleToggleAll = () => {
+    if (meetingsList.length === 0) return
+    if (selectedMeetingIds.size === meetingsList.length) {
+      setSelectedMeetingIds(new Set())
+      return
+    }
+    setSelectedMeetingIds(new Set(meetingsList.map((meeting) => meeting.id)))
+  }
+
  const handleDelete = async () => {
    if (selectedMeetingIds.size === 0) return
    setErrorMessage(null)
@@ -277,7 +371,11 @@ function App() {
    <div className="app">
      <div className="left-panel">
        {errorMessage && <div className="error-banner">{errorMessage}</div>}
-        <TranscriptPanel transcriptLines={transcriptLines} />
+        <TranscriptPanel
+          transcriptLines={transcriptLines}
+          interimText=""
+          isRecording={isRecording}
+        />
        <div className="controls">
          <button
            type="button"
@@ -290,13 +388,14 @@ function App() {
          <button type="button" className="stop-btn" onClick={handleStop} disabled={!isRecording}>
            중지
          </button>
-          <button type="button" className="save-btn" onClick={handleSave} disabled={!currentMeetingId}>
-            저장
-          </button>
+          <div className="mic-meter" aria-hidden="true">
+            <div
+              className="mic-meter-bar"
+              style={{ width: `${Math.round(micLevel * 100)}%` }}
+            />
+            <span className="mic-meter-label">{Math.round(micLevel * 100)}%</span>
+          </div>
        </div>
-        {!hasSpeechRecognition && (
-          <div className="hint">Chrome에서만 Web Speech API가 안정적으로 동작합니다.</div>
-        )}
      </div>
      <div className="right-panel">
        <div className="panel-title">대화 리스트</div>
@@ -304,7 +403,9 @@ function App() {
          meetings={meetingsList}
          isEditMode={isEditMode}
          selectedIds={selectedMeetingIds}
+          allSelected={meetingsList.length > 0 && selectedMeetingIds.size === meetingsList.length}
          onToggleSelect={handleToggleSelect}
+          onToggleAll={handleToggleAll}
          onSelectMeeting={handleSelectMeeting}
        />
        <div className="list-controls">
--- a/client/src/components/MeetingList.tsx
+++ b/client/src/components/MeetingList.tsx
@@ -4,7 +4,9 @@ type Props = {
  meetings: MeetingSummary[]
  isEditMode: boolean
  selectedIds: Set<number>
+  allSelected: boolean
  onToggleSelect: (id: number) => void
+  onToggleAll: () => void
  onSelectMeeting: (id: number) => void
 }

@@ -24,11 +26,24 @@ export default function MeetingList({
  meetings,
  isEditMode,
  selectedIds,
+  allSelected,
  onToggleSelect,
+  onToggleAll,
  onSelectMeeting,
 }: Props) {
  return (
    <div className="meeting-list">
+      {isEditMode && (
+        <div className="meeting-list-toolbar">
+          <button
+            type="button"
+            className={`select-all-btn ${allSelected ? 'active' : ''}`.trim()}
+            onClick={onToggleAll}
+          >
+            전체
+          </button>
+        </div>
+      )}
      {meetings.length === 0 && (
        <div className="placeholder">저장된 대화가 없습니다.</div>
      )}
--- a/client/src/components/TranscriptPanel.tsx
+++ b/client/src/components/TranscriptPanel.tsx
@@ -7,12 +7,17 @@ type TranscriptLine = {

 type Props = {
  transcriptLines: TranscriptLine[]
+  interimText: string
+  isRecording: boolean
 }

-export default function TranscriptPanel({ transcriptLines }: Props) {
+export default function TranscriptPanel({ transcriptLines, interimText, isRecording }: Props) {
  return (
    <div className="panel transcript-panel">
      <div className="panel-title">대화/STT</div>
+      <div className="interim-box">
+        {interimText ? interimText : ''}
+      </div>
      <div className="transcript-content">
        {transcriptLines.length === 0 && (
          <div className="placeholder">대화를 시작하면 STT 로그가 표시됩니다.</div>
--- a/client/src/types/speech.d.ts
+++ b/client/src/types/speech.d.ts
@@ -1,18 +1,34 @@
+export {}
+
+declare global {
  interface SpeechRecognitionEvent extends Event {
    resultIndex: number
    results: SpeechRecognitionResultList
  }

+  interface SpeechRecognitionErrorEvent extends Event {
+    error:
+      | 'no-speech'
+      | 'aborted'
+      | 'audio-capture'
+      | 'network'
+      | 'not-allowed'
+      | 'service-not-allowed'
+      | string
+  }
+
  interface SpeechRecognition extends EventTarget {
    continuous: boolean
    interimResults: boolean
    lang: string
    maxAlternatives: number
+    onstart: (() => void) | null
    onresult: ((event: SpeechRecognitionEvent) => void) | null
-  onerror: ((event: Event) => void) | null
+    onerror: ((event: SpeechRecognitionErrorEvent) => void) | null
    onend: (() => void) | null
    start: () => void
    stop: () => void
+    abort: () => void
  }

  interface SpeechRecognitionConstructor {
@@ -23,3 +39,4 @@ interface Window {
    SpeechRecognition?: SpeechRecognitionConstructor
    webkitSpeechRecognition?: SpeechRecognitionConstructor
  }
+}