diff --git a/client/src/App.css b/client/src/App.css index c2f805b..96207ff 100644 --- a/client/src/App.css +++ b/client/src/App.css @@ -42,6 +42,16 @@ font-size: 16px; } +.interim-box { + background: #f3f4f6; + border: 1px solid #e5e7eb; + border-radius: 8px; + padding: 10px 12px; + font-size: 14px; + color: #374151; + min-height: 42px; +} + .transcript-panel { flex: 3 1 0; min-height: 0; @@ -103,6 +113,7 @@ .controls { display: flex; gap: 12px; + align-items: center; } button { @@ -114,8 +125,8 @@ button { } .record-btn { - background: #e5e7eb; - color: #111827; + background: #fca5a5; + color: #7f1d1d; } .record-btn.recording { @@ -129,11 +140,34 @@ button { color: #fff; } -.save-btn { - background: #2563eb; - color: #fff; +.mic-meter { + position: relative; + width: 140px; + height: 10px; + background: #e5e7eb; + border-radius: 999px; + overflow: hidden; } +.mic-meter-bar { + height: 100%; + background: #ef4444; + width: 0%; + transition: width 80ms linear; +} + +.mic-meter-label { + position: absolute; + right: 8px; + top: 50%; + transform: translateY(-50%); + font-size: 10px; + font-weight: 600; + color: #111827; + pointer-events: none; +} + + .meeting-list { flex: 1; border: 1px solid #f0f0f0; @@ -143,6 +177,25 @@ button { background: #fafafa; } +.meeting-list-toolbar { + display: flex; + justify-content: flex-end; + margin-bottom: 8px; +} + +.select-all-btn { + background: #e5e7eb; + color: #111827; + padding: 6px 12px; + border-radius: 999px; + font-size: 12px; +} + +.select-all-btn.active { + background: #111827; + color: #fff; +} + .meeting-item { margin-bottom: 10px; } diff --git a/client/src/App.tsx b/client/src/App.tsx index 2964d82..228da92 100644 --- a/client/src/App.tsx +++ b/client/src/App.tsx @@ -2,14 +2,7 @@ import { useEffect, useMemo, useRef, useState } from 'react' import './App.css' import TranscriptPanel from './components/TranscriptPanel' import MeetingList from './components/MeetingList' -import { - createMeeting, - deleteMeetings, - endMeeting, - fetchMeeting, - fetchMeetings, - saveUtterance, -} from './lib/api' +import { createMeeting, deleteMeetings, endMeeting, fetchMeeting, fetchMeetings } from './lib/api' function App() { const [isRecording, setIsRecording] = useState(false) @@ -25,20 +18,31 @@ function App() { new Set() ) const [errorMessage, setErrorMessage] = useState(null) - - const recognitionRef = useRef(null) - const liveTextRef = useRef('') + const [micLevel, setMicLevel] = useState(0) const lineIdRef = useRef(1) const meetingIdRef = useRef(null) - const pendingUtterancesRef = useRef<{ ts: string; text: string }[]>([]) + const recognitionRef = useRef(null) const isRecordingRef = useRef(false) - const lastResultAtRef = useRef(Date.now()) - const restartLockRef = useRef(false) - const isStartingRef = useRef(false) - + const stopRequestedRef = useRef(false) + const maxLinesRef = useRef(500) + const micLevelRef = useRef(0) + const pendingTranscriptRef = useRef('') + const lastAutoFinalAtRef = useRef(0) + const lastAutoFinalTextRef = useRef('') + const lastVoiceAtRef = useRef(0) + const lastSilenceFinalAtRef = useRef(0) + const lastResultAtRef = useRef(0) + const noResultTimerRef = useRef(null) + const resetPendingRef = useRef(false) + const lastFinalTextRef = useRef('') + const lastFinalAtRef = useRef(0) + const audioContextRef = useRef(null) const hasSpeechRecognition = useMemo(() => { return 'SpeechRecognition' in window || 'webkitSpeechRecognition' in window }, []) + const analyserRef = useRef(null) + const mediaStreamRef = useRef(null) + const meterRafRef = useRef(null) useEffect(() => { fetchMeetings() @@ -46,169 +50,250 @@ function App() { .catch((err) => setErrorMessage(err.message)) }, []) - useEffect(() => { - if (!isRecording) return - const intervalId = window.setInterval(() => { - if (!isRecordingRef.current) return - const now = Date.now() - if (now - lastResultAtRef.current > 4000) { - void safeRestartRecognition() - } - }, 2000) - return () => window.clearInterval(intervalId) - }, [isRecording]) - - const persistFinal = async (ts: string, text: string) => { - if (!meetingIdRef.current) { - pendingUtterancesRef.current.push({ ts, text }) - return - } - try { - await saveUtterance(meetingIdRef.current, text, ts) - } catch (err) { - setErrorMessage((err as Error).message) + const clearNoResultTimer = () => { + if (noResultTimerRef.current !== null) { + window.clearTimeout(noResultTimerRef.current) + noResultTimerRef.current = null } } - const updateTranscript = (text: string, isFinal: boolean) => { + const scheduleNoResultReset = () => { + clearNoResultTimer() + if (!isRecordingRef.current || stopRequestedRef.current) return + noResultTimerRef.current = window.setTimeout(() => { + noResultTimerRef.current = null + if (!isRecordingRef.current || stopRequestedRef.current) return + const now = Date.now() + if (now - lastResultAtRef.current < 1500) return + if (resetPendingRef.current) return + resetPendingRef.current = true + try { + recognitionRef.current?.stop() + } catch { + // ignore stop errors + } + window.setTimeout(() => { + resetPendingRef.current = false + if (isRecordingRef.current && !stopRequestedRef.current) { + startRecognition() + } + }, 300) + }, 1500) + } + const appendFinalLine = (text: string) => { const trimmed = text.trim() if (!trimmed) return - const ts = new Date().toISOString() - liveTextRef.current = isFinal ? '' : trimmed - setTranscriptLines((prev) => { - const last = prev[prev.length - 1] - if (last && !last.isFinal) { - return [ - ...prev.slice(0, -1), - { ...last, text: trimmed, ts, isFinal }, - ] + const tokenCount = trimmed.split(/\s+/).filter(Boolean).length + if (tokenCount < 2) return + const now = Date.now() + if (now - lastFinalAtRef.current < 1200) { + const last = lastFinalTextRef.current + if (last && (last.includes(trimmed) || trimmed.includes(last))) { + return } - if (last && last.isFinal && isFinal && last.text.trim() === trimmed) { - return prev - } - return [...prev, { id: lineIdRef.current++, ts, text: trimmed, isFinal }] - }) - if (isFinal) { - void persistFinal(ts, trimmed) } + const ts = new Date().toISOString() + setTranscriptLines((prev) => { + const next = [...prev, { id: lineIdRef.current++, ts, text: trimmed, isFinal: true }] + const overflow = next.length - maxLinesRef.current + return overflow > 0 ? next.slice(overflow) : next + }) + lastFinalTextRef.current = trimmed + lastFinalAtRef.current = now } const startRecognition = () => { const SpeechRecognitionConstructor = window.SpeechRecognition || window.webkitSpeechRecognition - if (!SpeechRecognitionConstructor) { setErrorMessage('이 브라우저에서는 STT를 지원하지 않습니다. Chrome을 사용해 주세요.') return } - - const recognition = new SpeechRecognitionConstructor() + const recognition = recognitionRef.current ?? new SpeechRecognitionConstructor() recognition.lang = 'ko-KR' recognition.interimResults = true recognition.continuous = true - recognition.maxAlternatives = 3 + recognition.maxAlternatives = 1 recognition.onresult = (event) => { lastResultAtRef.current = Date.now() for (let i = event.resultIndex; i < event.results.length; i += 1) { const result = event.results[i] - const text = result[0].transcript - updateTranscript(text, result.isFinal) + if (!result || !result[0]) continue + const transcript = result[0].transcript + if (!transcript) continue + pendingTranscriptRef.current = transcript + if (result.isFinal) { + appendFinalLine(transcript) + } } + scheduleNoResultReset() } - recognition.onerror = () => { + recognition.onerror = (event: SpeechRecognitionErrorEvent) => { + const errorCode = event?.error + if (errorCode === 'aborted' || errorCode === 'no-speech') { + return + } setErrorMessage('음성 인식 중 오류가 발생했습니다.') } recognition.onend = () => { - liveTextRef.current = '' - if (isRecordingRef.current) { + if (isRecordingRef.current && !stopRequestedRef.current) { window.setTimeout(() => { - void safeRestartRecognition() + startRecognition() }, 200) - } else { - setIsRecording(false) } } recognitionRef.current = recognition - if (!isStartingRef.current) { - isStartingRef.current = true - try { - recognition.start() - } catch { - // ignore start errors - } finally { - window.setTimeout(() => { - isStartingRef.current = false - }, 200) - } + try { + recognition.start() + } catch { + // ignore start errors } + scheduleNoResultReset() + } + + useEffect(() => { + return () => { + stopMeter() + } + }, []) + + const startMeter = async () => { + if (meterRafRef.current !== null) return + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }) + mediaStreamRef.current = stream + const AudioCtx = window.AudioContext || (window as typeof window & { + webkitAudioContext?: typeof AudioContext + }).webkitAudioContext + if (!AudioCtx) return + const audioContext = new AudioCtx() + audioContextRef.current = audioContext + const analyser = audioContext.createAnalyser() + analyser.fftSize = 2048 + analyserRef.current = analyser + const source = audioContext.createMediaStreamSource(stream) + source.connect(analyser) + const data = new Uint8Array(analyser.fftSize) + const loop = () => { + if (!analyserRef.current) return + analyserRef.current.getByteTimeDomainData(data) + let sum = 0 + for (let i = 0; i < data.length; i += 1) { + const v = (data[i] - 128) / 128 + sum += v * v + } + const rms = Math.sqrt(sum / data.length) + const scaled = Math.log10(1 + rms * 120) / Math.log10(121) + const level = Math.min(1, Math.max(0, scaled)) + const smooth = level * 0.5 + micLevelRef.current * 0.5 + micLevelRef.current = smooth + setMicLevel(smooth) + if (isRecordingRef.current) { + const percent = smooth * 100 + const now = Date.now() + if (percent >= 8) { + lastVoiceAtRef.current = now + } + const silenceMs = now - lastVoiceAtRef.current + if ( + silenceMs >= 900 && + pendingTranscriptRef.current && + now - lastSilenceFinalAtRef.current > 1200 && + pendingTranscriptRef.current !== lastAutoFinalTextRef.current + ) { + appendFinalLine(pendingTranscriptRef.current) + lastSilenceFinalAtRef.current = now + lastAutoFinalAtRef.current = now + lastAutoFinalTextRef.current = pendingTranscriptRef.current + pendingTranscriptRef.current = '' + } + } + meterRafRef.current = window.requestAnimationFrame(loop) + } + meterRafRef.current = window.requestAnimationFrame(loop) + } catch (err) { + setErrorMessage((err as Error).message) + } + } + + const stopMeter = () => { + if (meterRafRef.current !== null) { + window.cancelAnimationFrame(meterRafRef.current) + meterRafRef.current = null + } + if (analyserRef.current) { + analyserRef.current.disconnect() + analyserRef.current = null + } + if (audioContextRef.current) { + audioContextRef.current.close().catch(() => undefined) + audioContextRef.current = null + } + if (mediaStreamRef.current) { + mediaStreamRef.current.getTracks().forEach((track) => track.stop()) + mediaStreamRef.current = null + } + setMicLevel(0) + micLevelRef.current = 0 + pendingTranscriptRef.current = '' + lastAutoFinalAtRef.current = 0 + lastAutoFinalTextRef.current = '' + lastVoiceAtRef.current = 0 + lastSilenceFinalAtRef.current = 0 + clearNoResultTimer() } const handleStart = async () => { setErrorMessage(null) lineIdRef.current = 1 - pendingUtterancesRef.current = [] setTranscriptLines([]) meetingIdRef.current = null setCurrentMeetingId(null) setIsRecording(true) isRecordingRef.current = true - lastResultAtRef.current = Date.now() - startRecognition() + stopRequestedRef.current = false + lastResultAtRef.current = 0 + resetPendingRef.current = false + lastVoiceAtRef.current = 0 + lastSilenceFinalAtRef.current = 0 + scheduleNoResultReset() + pendingTranscriptRef.current = '' + lastAutoFinalAtRef.current = 0 + lastAutoFinalTextRef.current = '' + if (hasSpeechRecognition) { + startRecognition() + } + void startMeter() try { const result = await createMeeting(new Date().toISOString()) meetingIdRef.current = result.id setCurrentMeetingId(result.id) - const pending = [...pendingUtterancesRef.current] - pendingUtterancesRef.current = [] - await Promise.all( - pending.map((item) => saveUtterance(result.id, item.text, item.ts)) - ) } catch (err) { setErrorMessage((err as Error).message) } } const handleStop = async () => { - if (!meetingIdRef.current) return setErrorMessage(null) - recognitionRef.current?.stop() - liveTextRef.current = '' setIsRecording(false) isRecordingRef.current = false + stopRequestedRef.current = true + lastVoiceAtRef.current = 0 + lastSilenceFinalAtRef.current = 0 + clearNoResultTimer() + pendingTranscriptRef.current = '' + recognitionRef.current?.stop() + stopMeter() try { - await endMeeting(meetingIdRef.current, new Date().toISOString()) - const list = await fetchMeetings() - setMeetingsList(list) - } catch (err) { - setErrorMessage((err as Error).message) - } - } - - const safeRestartRecognition = async () => { - if (!recognitionRef.current || restartLockRef.current) return - restartLockRef.current = true - try { - recognitionRef.current.stop() - recognitionRef.current.start() - lastResultAtRef.current = Date.now() - } catch { - // ignore restart errors - } finally { - window.setTimeout(() => { - restartLockRef.current = false - }, 500) - } - } - - const handleSave = async () => { - if (!meetingIdRef.current) return - setErrorMessage(null) - try { - await endMeeting(meetingIdRef.current, new Date().toISOString()) + if (meetingIdRef.current) { + await endMeeting(meetingIdRef.current, new Date().toISOString()) + const list = await fetchMeetings() + setMeetingsList(list) + } } catch (err) { setErrorMessage((err as Error).message) } @@ -253,6 +338,15 @@ function App() { setSelectedMeetingIds(next) } + const handleToggleAll = () => { + if (meetingsList.length === 0) return + if (selectedMeetingIds.size === meetingsList.length) { + setSelectedMeetingIds(new Set()) + return + } + setSelectedMeetingIds(new Set(meetingsList.map((meeting) => meeting.id))) + } + const handleDelete = async () => { if (selectedMeetingIds.size === 0) return setErrorMessage(null) @@ -277,7 +371,11 @@ function App() {
{errorMessage &&
{errorMessage}
} - +
- + - {!hasSpeechRecognition && ( -
Chrome에서만 Web Speech API가 안정적으로 동작합니다.
- )}
대화 리스트
@@ -304,7 +403,9 @@ function App() { meetings={meetingsList} isEditMode={isEditMode} selectedIds={selectedMeetingIds} + allSelected={meetingsList.length > 0 && selectedMeetingIds.size === meetingsList.length} onToggleSelect={handleToggleSelect} + onToggleAll={handleToggleAll} onSelectMeeting={handleSelectMeeting} />
diff --git a/client/src/components/MeetingList.tsx b/client/src/components/MeetingList.tsx index ab835a7..bc7bd4d 100644 --- a/client/src/components/MeetingList.tsx +++ b/client/src/components/MeetingList.tsx @@ -4,7 +4,9 @@ type Props = { meetings: MeetingSummary[] isEditMode: boolean selectedIds: Set + allSelected: boolean onToggleSelect: (id: number) => void + onToggleAll: () => void onSelectMeeting: (id: number) => void } @@ -24,11 +26,24 @@ export default function MeetingList({ meetings, isEditMode, selectedIds, + allSelected, onToggleSelect, + onToggleAll, onSelectMeeting, }: Props) { return (
+ {isEditMode && ( +
+ +
+ )} {meetings.length === 0 && (
저장된 대화가 없습니다.
)} diff --git a/client/src/components/TranscriptPanel.tsx b/client/src/components/TranscriptPanel.tsx index 26e4209..a8204f6 100644 --- a/client/src/components/TranscriptPanel.tsx +++ b/client/src/components/TranscriptPanel.tsx @@ -7,12 +7,17 @@ type TranscriptLine = { type Props = { transcriptLines: TranscriptLine[] + interimText: string + isRecording: boolean } -export default function TranscriptPanel({ transcriptLines }: Props) { +export default function TranscriptPanel({ transcriptLines, interimText, isRecording }: Props) { return (
대화/STT
+
+ {interimText ? interimText : ''} +
{transcriptLines.length === 0 && (
대화를 시작하면 STT 로그가 표시됩니다.
diff --git a/client/src/types/speech.d.ts b/client/src/types/speech.d.ts index c7ceda5..a33f094 100644 --- a/client/src/types/speech.d.ts +++ b/client/src/types/speech.d.ts @@ -1,25 +1,42 @@ -interface SpeechRecognitionEvent extends Event { - resultIndex: number - results: SpeechRecognitionResultList -} +export {} -interface SpeechRecognition extends EventTarget { - continuous: boolean - interimResults: boolean - lang: string - maxAlternatives: number - onresult: ((event: SpeechRecognitionEvent) => void) | null - onerror: ((event: Event) => void) | null - onend: (() => void) | null - start: () => void - stop: () => void -} +declare global { + interface SpeechRecognitionEvent extends Event { + resultIndex: number + results: SpeechRecognitionResultList + } -interface SpeechRecognitionConstructor { - new (): SpeechRecognition -} + interface SpeechRecognitionErrorEvent extends Event { + error: + | 'no-speech' + | 'aborted' + | 'audio-capture' + | 'network' + | 'not-allowed' + | 'service-not-allowed' + | string + } -interface Window { - SpeechRecognition?: SpeechRecognitionConstructor - webkitSpeechRecognition?: SpeechRecognitionConstructor + interface SpeechRecognition extends EventTarget { + continuous: boolean + interimResults: boolean + lang: string + maxAlternatives: number + onstart: (() => void) | null + onresult: ((event: SpeechRecognitionEvent) => void) | null + onerror: ((event: SpeechRecognitionErrorEvent) => void) | null + onend: (() => void) | null + start: () => void + stop: () => void + abort: () => void + } + + interface SpeechRecognitionConstructor { + new (): SpeechRecognition + } + + interface Window { + SpeechRecognition?: SpeechRecognitionConstructor + webkitSpeechRecognition?: SpeechRecognitionConstructor + } }