diff --git a/vllm/engine/output_processor/stop_checker.py b/vllm/engine/output_processor/stop_checker.py index 316752a2abb87..a43c79b4ea8c9 100644 --- a/vllm/engine/output_processor/stop_checker.py +++ b/vllm/engine/output_processor/stop_checker.py @@ -101,11 +101,14 @@ def maybe_stop_sequence( # Check if the last ngram is repeated in the output text. last_token = seq.output_text[-new_char_count:] # start checking for repetition after the first 32 tokens - if seq.get_output_len() > 32 and self.check_ngram_repetition(seq, sampling_params, last_token): + if seq.get_output_len() > 32 and self.check_ngram_repetition( + seq, sampling_params, last_token): seq.status = SequenceStatus.FINISHED_REPEATED return - def check_ngram_repetition(self, seq: Sequence, sampling_params: SamplingParams, last_token: str) -> bool: + def check_ngram_repetition(self, seq: Sequence, + sampling_params: SamplingParams, + last_token: str) -> bool: """Check if the last ngram is repeated in the output text. """ @@ -164,8 +167,6 @@ def check_ngram_repetition(self, seq: Sequence, sampling_params: SamplingParams, return is_done - - @staticmethod def check_stop_strings( output_text: str,