diff --git a/dolomite_engine/hf_models/mixins/dense_TP/main.py b/dolomite_engine/hf_models/mixins/dense_TP/main.py index 782dc5da..2553c072 100644 --- a/dolomite_engine/hf_models/mixins/dense_TP/main.py +++ b/dolomite_engine/hf_models/mixins/dense_TP/main.py @@ -60,7 +60,6 @@ def forward( cu_seqlens: torch.Tensor | None = None, max_seqlen: torch.Tensor | None = None, ) -> tuple | CausalLMOutputWithPast: - assert not output_attentions assert return_dict input_ids, position_ids, token_type_ids, labels, cu_seqlens, max_seqlen = self.prepare_inputs_for_model(