Skip to content

Commit

Permalink
reformat code according to checking result
Browse files Browse the repository at this point in the history
  • Loading branch information
white2018 committed Sep 15, 2024
1 parent a76241d commit 8cb8865
Showing 1 changed file with 16 additions and 42 deletions.
58 changes: 16 additions & 42 deletions lmms_eval/models/minimonkey.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __init__(
self._rank = self.accelerator.local_process_index
self._world_size = self.accelerator.num_processes
else:
#self.model.to(self._device)
# self.model.to(self._device)
self._rank = 0
self._word_size = 1

Expand Down Expand Up @@ -200,12 +200,9 @@ def _collate(x):
pixel_values2 = load_image2(image, min_num=3, max_num=7, target_aspect_ratio=target_aspect_ratio)
pixel_values = torch.cat([pixel_values2[:-1], pixel_values[:-1], pixel_values2[-1:]], 0).to(self._device).to(self.dtype)

response, history = self.model.chat(self.tokenizer, pixel_values,
target_aspect_ratio, prompt, gen_kwargs,
history=None, return_history=True)
response, history = self.model.chat(self.tokenizer, pixel_values, target_aspect_ratio, prompt, gen_kwargs, history=None, return_history=True)

context = [{"role": "user", "content": prompt},
{"role": "assistant", "content": response}]
context = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response}]
except Exception as e:
eval_logger.error(f"Error {e} in generating")
cont = ""
Expand All @@ -220,8 +217,8 @@ def _collate(x):


import numpy as np
from PIL import Image
import torchvision.transforms as T
from PIL import Image
from torchvision.transforms.functional import InterpolationMode

IMAGENET_MEAN = (0.485, 0.456, 0.406)
Expand All @@ -230,17 +227,12 @@ def _collate(x):

def build_transform(input_size):
MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
transform = T.Compose([
T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(mean=MEAN, std=STD)
])
transform = T.Compose([T.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img), T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), T.ToTensor(), T.Normalize(mean=MEAN, std=STD)])
return transform


def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
best_ratio_diff = float('inf')
best_ratio_diff = float("inf")
best_ratio = (1, 1)
area = width * height
for ratio in target_ratios:
Expand All @@ -260,14 +252,11 @@ def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbna
aspect_ratio = orig_width / orig_height

# calculate the existing image aspect ratio
target_ratios = set(
(i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
i * j <= max_num and i * j >= min_num)
target_ratios = set((i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if i * j <= max_num and i * j >= min_num)
target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])

# find the closest aspect ratio to the target
target_aspect_ratio = find_closest_aspect_ratio(
aspect_ratio, target_ratios, orig_width, orig_height, image_size)
target_aspect_ratio = find_closest_aspect_ratio(aspect_ratio, target_ratios, orig_width, orig_height, image_size)

# calculate the target width and height
target_width = image_size * target_aspect_ratio[0]
Expand All @@ -278,12 +267,7 @@ def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbna
resized_img = image.resize((target_width, target_height))
processed_images = []
for i in range(blocks):
box = (
(i % (target_width // image_size)) * image_size,
(i // (target_width // image_size)) * image_size,
((i % (target_width // image_size)) + 1) * image_size,
((i // (target_width // image_size)) + 1) * image_size
)
box = ((i % (target_width // image_size)) * image_size, (i // (target_width // image_size)) * image_size, ((i % (target_width // image_size)) + 1) * image_size, ((i // (target_width // image_size)) + 1) * image_size)
# split the image
split_img = resized_img.crop(box)
processed_images.append(split_img)
Expand All @@ -299,9 +283,7 @@ def dynamic_preprocess2(image, min_num=1, max_num=12, prior_aspect_ratio=None, i
aspect_ratio = orig_width / orig_height

# calculate the existing image aspect ratio
target_ratios = set(
(i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
i * j <= max_num and i * j >= min_num)
target_ratios = set((i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if i * j <= max_num and i * j >= min_num)
target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
new_target_ratios = []
for i in target_ratios:
Expand All @@ -310,8 +292,7 @@ def dynamic_preprocess2(image, min_num=1, max_num=12, prior_aspect_ratio=None, i
else:
continue
# find the closest aspect ratio to the target
target_aspect_ratio = find_closest_aspect_ratio(
aspect_ratio, new_target_ratios, orig_width, orig_height, image_size)
target_aspect_ratio = find_closest_aspect_ratio(aspect_ratio, new_target_ratios, orig_width, orig_height, image_size)
# calculate the target width and height
target_width = image_size * target_aspect_ratio[0]
target_height = image_size * target_aspect_ratio[1]
Expand All @@ -321,12 +302,7 @@ def dynamic_preprocess2(image, min_num=1, max_num=12, prior_aspect_ratio=None, i
resized_img = image.resize((target_width, target_height))
processed_images = []
for i in range(blocks):
box = (
(i % (target_width // image_size)) * image_size,
(i // (target_width // image_size)) * image_size,
((i % (target_width // image_size)) + 1) * image_size,
((i // (target_width // image_size)) + 1) * image_size
)
box = ((i % (target_width // image_size)) * image_size, (i // (target_width // image_size)) * image_size, ((i % (target_width // image_size)) + 1) * image_size, ((i // (target_width // image_size)) + 1) * image_size)
# split the image
split_img = resized_img.crop(box)
processed_images.append(split_img)
Expand All @@ -338,20 +314,18 @@ def dynamic_preprocess2(image, min_num=1, max_num=12, prior_aspect_ratio=None, i


def load_image(image, input_size=448, min_num=1, max_num=12):
image = image.convert('RGB')
image = image.convert("RGB")
transform = build_transform(input_size=input_size)
images, target_aspect_ratio = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True,
min_num=min_num, max_num=max_num)
images, target_aspect_ratio = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, min_num=min_num, max_num=max_num)
pixel_values = [transform(image) for image in images]
pixel_values = torch.stack(pixel_values)
return pixel_values, target_aspect_ratio


def load_image2(image, input_size=448, min_num=1, max_num=12, target_aspect_ratio=None):
image = image.convert('RGB')
image = image.convert("RGB")
transform = build_transform(input_size=input_size)
images = dynamic_preprocess2(image, image_size=input_size, use_thumbnail=True, min_num=min_num,
max_num=max_num, prior_aspect_ratio=target_aspect_ratio)
images = dynamic_preprocess2(image, image_size=input_size, use_thumbnail=True, min_num=min_num, max_num=max_num, prior_aspect_ratio=target_aspect_ratio)
pixel_values = [transform(image) for image in images]
pixel_values = torch.stack(pixel_values)
return pixel_values

0 comments on commit 8cb8865

Please sign in to comment.