From 9f8d1b40135e09bb9935750367d9cbe9f267edd1 Mon Sep 17 00:00:00 2001 From: Kaichen Zhang - NTU Date: Tue, 17 Sep 2024 14:48:19 +0800 Subject: [PATCH] Add support for llava_hf video, better loading logic for llava_hf ckpt (#260) --- lmms_eval/models/llava_hf.py | 65 +++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/lmms_eval/models/llava_hf.py b/lmms_eval/models/llava_hf.py index 0fab0e011..a2f304766 100644 --- a/lmms_eval/models/llava_hf.py +++ b/lmms_eval/models/llava_hf.py @@ -1,11 +1,15 @@ import warnings from typing import List, Optional, Tuple, Union +import numpy as np +import PIL import torch from accelerate import Accelerator, DistributedType from accelerate.state import AcceleratorState +from decord import VideoReader, cpu from tqdm import tqdm from transformers import ( + AutoConfig, AutoProcessor, LlavaForConditionalGeneration, LlavaNextForConditionalGeneration, @@ -21,10 +25,23 @@ from loguru import logger as eval_logger DEFAULT_IMAGE_TOKEN = "" +DEFAULT_VIDEO_TOKEN = "