diff --git a/lmms_eval/models/llava_hf.py b/lmms_eval/models/llava_hf.py index 0fab0e011..a2f304766 100644 --- a/lmms_eval/models/llava_hf.py +++ b/lmms_eval/models/llava_hf.py @@ -1,11 +1,15 @@ import warnings from typing import List, Optional, Tuple, Union +import numpy as np +import PIL import torch from accelerate import Accelerator, DistributedType from accelerate.state import AcceleratorState +from decord import VideoReader, cpu from tqdm import tqdm from transformers import ( + AutoConfig, AutoProcessor, LlavaForConditionalGeneration, LlavaNextForConditionalGeneration, @@ -21,10 +25,23 @@ from loguru import logger as eval_logger DEFAULT_IMAGE_TOKEN = "" +DEFAULT_VIDEO_TOKEN = "