Skip to content

Commit

Permalink
Merge pull request #158 from makaveli10/cpu_usage
Browse files Browse the repository at this point in the history
fix: cpu usage issue.
  • Loading branch information
zoq authored Feb 28, 2024
2 parents ec13493 + 18b6168 commit 5e6c26c
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
13 changes: 10 additions & 3 deletions whisper_live/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,11 @@ def add_frames(self, frame_np):
if self.frames_np is not None and self.frames_np.shape[0] > 45*self.RATE:
self.frames_offset += 30.0
self.frames_np = self.frames_np[int(30*self.RATE):]
# check timestamp offset(should be >= self.frame_offset)
# this basically means that there is no speech as timestamp offset hasnt updated
# and is less than frame_offset
if self.timestamp_offset < self.frames_offset:
self.timestamp_offset = self.frames_offset
if self.frames_np is None:
self.frames_np = frame_np.copy()
else:
Expand Down Expand Up @@ -796,7 +801,8 @@ def transcribe_audio(self, input_sample):
task=self.task,
vad_filter=self.use_vad,
vad_parameters=self.vad_parameters if self.use_vad else None)
if self.language is None:

if self.language is None and info is not None:
self.set_language(info)
return result

Expand Down Expand Up @@ -881,7 +887,9 @@ def speech_to_text(self):
input_sample = input_bytes.copy()
result = self.transcribe_audio(input_sample)

if self.language is None:
if result is None or self.language is None:
self.timestamp_offset += duration
time.sleep(0.25) # wait for voice activity, result is None when no voice activity
continue
self.handle_transcription_output(result, duration)

Expand Down Expand Up @@ -932,7 +940,6 @@ def update_segments(self, segments, duration):
"""
offset = None
self.current_out = ''
last_segment = None
# process complete segments
if len(segments) > 1:
for i, s in enumerate(segments[:-1]):
Expand Down
5 changes: 4 additions & 1 deletion whisper_live/transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def _get_feature_kwargs(self, model_path) -> dict:

return config

def transcribe(
def transcribe( # noqa: C901
self,
audio: Union[str, BinaryIO, np.ndarray],
language: Optional[str] = None,
Expand Down Expand Up @@ -315,6 +315,9 @@ def transcribe(
else:
speech_chunks = None

if audio.shape[0] == 0:
return None, None

features = self.feature_extractor(audio)

encoder_output = None
Expand Down

0 comments on commit 5e6c26c

Please sign in to comment.