Skip to content

Commit

Permalink
fixing bad bug
Browse files Browse the repository at this point in the history
  • Loading branch information
jbilcke-hf committed Aug 6, 2024
1 parent 5e9ff60 commit 9a0fef2
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 15 deletions.
4 changes: 3 additions & 1 deletion src/app/api/assistant/templates.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ Each item describes a different property (or facet) of the scene, based on its c
- Remember, if the director is asking to edit the video project data structure, you MUST only return the item object, in JSON format.
- If you don't understand how to modify, it's okay to say you don't understand and politely ask for clarification.
- When you edit a JSON list, sure to recopy the id for each field exactly like it is in the original, otherwise it breaks everything.
- The director might give a query in English, French, Spanish.. but the movie scene is in English.
- The director might give a query in French, English, Spanish.. but the movie scene is in English.
- ALWAYS write the output in English: if the query is in another language, translate it to English.
- When updating a scene (with UPDATE_STORY_AND_SCENE) never forget to update the updatedSceneSegments array!
- Also when updating a scene segments, NEVER, EVER FORGET ABOUT THE CAMERA SEGMENTS! BEcause this is how we actually split our scene into separate shots!
- Important: if the director is asking a QUESTION ("who is.. what is.. please analyze etc..") then DO NOT return JSON, but raw text instead`

export const examples = `
Expand Down
4 changes: 2 additions & 2 deletions src/services/assistant/useAssistant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ import { parseRawInputToAction } from './parseRawInputToAction'
import { useAudio } from '../audio/useAudio'
import { updateStoryAndScene } from './updateStoryAndScene'

const enableTextToSpeech = false

export const useAssistant = create<AssistantStore>((set, get) => ({
...getDefaultAssistantState(),

Expand Down Expand Up @@ -182,6 +180,8 @@ export const useAssistant = create<AssistantStore>((set, get) => ({
return
}

console.log('processUserMessage():', input)

const { addEventToHistory, processActionOrMessage } = get()
const {
bufferedSegments: { activeSegments },
Expand Down
6 changes: 2 additions & 4 deletions src/services/assistant/useVoiceAssistant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@ export function useVoiceAssistant() {
const stop = useMic((s) => s.stop)
const clear = useMic((s) => s.clear)

const debouncedTranscript = useDebounce(transcript, 1200)

useEffect(() => {
processUserMessage(debouncedTranscript)
}, [debouncedTranscript, processUserMessage])
processUserMessage(transcript)
}, [transcript, processUserMessage])

return {
isSupported,
Expand Down
54 changes: 46 additions & 8 deletions src/services/mic/useMic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import { MicStore } from '@aitube/clapper-services'

import { getDefaultMicState } from './getDefaultMicState'

const cutoffTimeInMs = 1200

export const useMic = create<MicStore>((set, get) => ({
...getDefaultMicState(),

Expand All @@ -28,20 +30,50 @@ export const useMic = create<MicStore>((set, get) => ({
})
}

recognition.interimResults = interimResults
recognition.interimResults = true
recognition.lang = lang
recognition.continuous = continuous
recognition.continuous = true

const speechRecognitionList = new window.webkitSpeechGrammarList()
speechRecognitionList.addFromString(grammar, grammarWeight)
recognition.grammars = speechRecognitionList

let debounceTimer: NodeJS.Timeout | null = null
let lastCompleteTranscript = ''
let currentTranscript = ''
let lastSpeechTime = Date.now()

const handleResult = (event: SpeechRecognitionEvent) => {
let transcript = ''
for (let i = 0; i < event.results.length; i++) {
transcript += event.results?.[i]?.[0]?.transcript || ''
const currentTime = Date.now()

// Check if it's been more than $cutoffTimeInMs since the last speech
if (currentTime - lastSpeechTime > cutoffTimeInMs) {
lastCompleteTranscript = ''
currentTranscript = ''
}

lastSpeechTime = currentTime

// Get the most recent result
const latestResult = event.results[event.results.length - 1]
currentTranscript = latestResult[0].transcript.trim()

// If it's a final result, update lastCompleteTranscript
if (latestResult.isFinal) {
lastCompleteTranscript = currentTranscript
}
set({ transcript })

const fullTranscript = lastCompleteTranscript +
(currentTranscript !== lastCompleteTranscript ? ' ' + currentTranscript : '')

if (debounceTimer) {
clearTimeout(debounceTimer)
}

debounceTimer = setTimeout(() => {
set({ transcript: fullTranscript.trim() })
debounceTimer = null
}, cutoffTimeInMs)
}

const handleError = (event: SpeechRecognitionErrorEvent) => {
Expand All @@ -54,7 +86,13 @@ export const useMic = create<MicStore>((set, get) => ({
}

const handleEnd = () => {
set({ isListening: false, transcript: '' })
if (debounceTimer) {
clearTimeout(debounceTimer)
const fullTranscript = lastCompleteTranscript +
(currentTranscript !== lastCompleteTranscript ? ' ' + currentTranscript : '')
set({ transcript: fullTranscript.trim() })
}
set({ isListening: false })
}

recognition.addEventListener('result', handleResult)
Expand Down Expand Up @@ -86,4 +124,4 @@ export const useMic = create<MicStore>((set, get) => ({

if (typeof window !== 'undefined') {
useMic.getState().init()
}
}

0 comments on commit 9a0fef2

Please sign in to comment.