reformat code according to checking result

EvolvingLMMs-Lab · Sep 15, 2024 · 8cb8865 · 8cb8865
1 parent a76241d
commit 8cb8865
Showing 1 changed file with 16 additions and 42 deletions.
diff --git a/lmms_eval/models/minimonkey.py b/lmms_eval/models/minimonkey.py
@@ -70,7 +70,7 @@ def __init__(
             self._rank = self.accelerator.local_process_index
             self._world_size = self.accelerator.num_processes
         else:
-            #self.model.to(self._device)
+            # self.model.to(self._device)
             self._rank = 0
             self._word_size = 1
 
@@ -200,12 +200,9 @@ def _collate(x):
                 pixel_values2 = load_image2(image, min_num=3, max_num=7, target_aspect_ratio=target_aspect_ratio)
                 pixel_values = torch.cat([pixel_values2[:-1], pixel_values[:-1], pixel_values2[-1:]], 0).to(self._device).to(self.dtype)
 
-                response, history = self.model.chat(self.tokenizer, pixel_values,
-                                                    target_aspect_ratio, prompt, gen_kwargs,
-                                                    history=None, return_history=True)
+                response, history = self.model.chat(self.tokenizer, pixel_values, target_aspect_ratio, prompt, gen_kwargs, history=None, return_history=True)
 
-                context = [{"role": "user", "content": prompt},
-                           {"role": "assistant", "content": response}]
+                context = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response}]
             except Exception as e:
                 eval_logger.error(f"Error {e} in generating")
                 cont = ""
@@ -220,8 +217,8 @@ def _collate(x):
 
 
 import numpy as np
-from PIL import Image
 import torchvision.transforms as T
+from PIL import Image
 from torchvision.transforms.functional import InterpolationMode
 
 IMAGENET_MEAN = (0.485, 0.456, 0.406)
@@ -230,17 +227,12 @@ def _collate(x):
 
 def build_transform(input_size):
     MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
-    transform = T.Compose([
-        T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
-        T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
-        T.ToTensor(),
-        T.Normalize(mean=MEAN, std=STD)
-    ])
+    transform = T.Compose([T.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img), T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), T.ToTensor(), T.Normalize(mean=MEAN, std=STD)])
     return transform
 
 
 def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
-    best_ratio_diff = float('inf')
+    best_ratio_diff = float("inf")
     best_ratio = (1, 1)
     area = width * height
     for ratio in target_ratios:
@@ -260,14 +252,11 @@ def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbna
     aspect_ratio = orig_width / orig_height
 
     # calculate the existing image aspect ratio
-    target_ratios = set(
-        (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
-        i * j <= max_num and i * j >= min_num)
+    target_ratios = set((i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if i * j <= max_num and i * j >= min_num)
     target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
 
     # find the closest aspect ratio to the target
-    target_aspect_ratio = find_closest_aspect_ratio(
-        aspect_ratio, target_ratios, orig_width, orig_height, image_size)
+    target_aspect_ratio = find_closest_aspect_ratio(aspect_ratio, target_ratios, orig_width, orig_height, image_size)
 
     # calculate the target width and height
     target_width = image_size * target_aspect_ratio[0]
@@ -278,12 +267,7 @@ def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbna
     resized_img = image.resize((target_width, target_height))
     processed_images = []
     for i in range(blocks):
-        box = (
-            (i % (target_width // image_size)) * image_size,
-            (i // (target_width // image_size)) * image_size,
-            ((i % (target_width // image_size)) + 1) * image_size,
-            ((i // (target_width // image_size)) + 1) * image_size
-        )
+        box = ((i % (target_width // image_size)) * image_size, (i // (target_width // image_size)) * image_size, ((i % (target_width // image_size)) + 1) * image_size, ((i // (target_width // image_size)) + 1) * image_size)
         # split the image
         split_img = resized_img.crop(box)
         processed_images.append(split_img)
@@ -299,9 +283,7 @@ def dynamic_preprocess2(image, min_num=1, max_num=12, prior_aspect_ratio=None, i
     aspect_ratio = orig_width / orig_height
 
     # calculate the existing image aspect ratio
-    target_ratios = set(
-        (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
-        i * j <= max_num and i * j >= min_num)
+    target_ratios = set((i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if i * j <= max_num and i * j >= min_num)
     target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
     new_target_ratios = []
     for i in target_ratios:
@@ -310,8 +292,7 @@ def dynamic_preprocess2(image, min_num=1, max_num=12, prior_aspect_ratio=None, i
         else:
             continue
     # find the closest aspect ratio to the target
-    target_aspect_ratio = find_closest_aspect_ratio(
-        aspect_ratio, new_target_ratios, orig_width, orig_height, image_size)
+    target_aspect_ratio = find_closest_aspect_ratio(aspect_ratio, new_target_ratios, orig_width, orig_height, image_size)
     # calculate the target width and height
     target_width = image_size * target_aspect_ratio[0]
     target_height = image_size * target_aspect_ratio[1]
@@ -321,12 +302,7 @@ def dynamic_preprocess2(image, min_num=1, max_num=12, prior_aspect_ratio=None, i
     resized_img = image.resize((target_width, target_height))
     processed_images = []
     for i in range(blocks):
-        box = (
-            (i % (target_width // image_size)) * image_size,
-            (i // (target_width // image_size)) * image_size,
-            ((i % (target_width // image_size)) + 1) * image_size,
-            ((i // (target_width // image_size)) + 1) * image_size
-        )
+        box = ((i % (target_width // image_size)) * image_size, (i // (target_width // image_size)) * image_size, ((i % (target_width // image_size)) + 1) * image_size, ((i // (target_width // image_size)) + 1) * image_size)
         # split the image
         split_img = resized_img.crop(box)
         processed_images.append(split_img)
@@ -338,20 +314,18 @@ def dynamic_preprocess2(image, min_num=1, max_num=12, prior_aspect_ratio=None, i
 
 
 def load_image(image, input_size=448, min_num=1, max_num=12):
-    image = image.convert('RGB')
+    image = image.convert("RGB")
     transform = build_transform(input_size=input_size)
-    images, target_aspect_ratio = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True,
-                                                     min_num=min_num, max_num=max_num)
+    images, target_aspect_ratio = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, min_num=min_num, max_num=max_num)
     pixel_values = [transform(image) for image in images]
     pixel_values = torch.stack(pixel_values)
     return pixel_values, target_aspect_ratio
 
 
 def load_image2(image, input_size=448, min_num=1, max_num=12, target_aspect_ratio=None):
-    image = image.convert('RGB')
+    image = image.convert("RGB")
     transform = build_transform(input_size=input_size)
-    images = dynamic_preprocess2(image, image_size=input_size, use_thumbnail=True, min_num=min_num,
-                                 max_num=max_num, prior_aspect_ratio=target_aspect_ratio)
+    images = dynamic_preprocess2(image, image_size=input_size, use_thumbnail=True, min_num=min_num, max_num=max_num, prior_aspect_ratio=target_aspect_ratio)
     pixel_values = [transform(image) for image in images]
     pixel_values = torch.stack(pixel_values)
     return pixel_values