reformat

basetenlabs · Jan 5, 2024 · f7924ef · f7924ef
1 parent 2145c38
commit f7924ef
Show file tree

Hide file tree

Showing 8 changed files with 144 additions and 104 deletions.
diff --git a/llama/llama-2-7b-trt-llm/packages/inflight_batcher_llm/preprocessing/1/model.py b/llama/llama-2-7b-trt-llm/packages/inflight_batcher_llm/preprocessing/1/model.py
@@ -58,11 +58,9 @@ def initialize(self, args):
         model_config = json.loads(args["model_config"])
         tokenizer_dir = os.environ["triton_tokenizer_repository"]
         tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
-        self.add_special_tokens = model_config['parameters'].get(
-            'add_special_tokens',
-            {'string_value': "false"})['string_value'].lower() in [
-                'true', '1', 't', 'y', 'yes'
-            ]
+        self.add_special_tokens = model_config["parameters"].get(
+            "add_special_tokens", {"string_value": "false"}
+        )["string_value"].lower() in ["true", "1", "t", "y", "yes"]
 
         if tokenizer_type == "t5":
             self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
@@ -189,25 +187,32 @@ def finalize(self):
 
     def _create_request(self, query):
         """
-            query : batch string (2D numpy array)
+        query : batch string (2D numpy array)
         """
         start_ids = [
             np.array(
                 self.tokenizer.encode(
-                    s[0].decode(),
-                    add_special_tokens=self.add_special_tokens)).astype(int)
+                    s[0].decode(), add_special_tokens=self.add_special_tokens
+                )
+            ).astype(int)
             for s in query
         ]
         start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)
 
         max_len = 0
         for seq in start_ids:
             max_len = max(max_len, seq.shape[0])
-        start_ids = np.stack([
-            np.pad(seq, (0, max_len - seq.shape[0]),
-                   'constant',
-                   constant_values=(0, self.pad_id)) for seq in start_ids
-        ])
+        start_ids = np.stack(
+            [
+                np.pad(
+                    seq,
+                    (0, max_len - seq.shape[0]),
+                    "constant",
+                    constant_values=(0, self.pad_id),
+                )
+                for seq in start_ids
+            ]
+        )
 
         return start_ids, start_lengths
 

diff --git a/...instruct-chat-trt-llm-smooth-quant/packages/inflight_batcher_llm/preprocessing/1/model.py b/...instruct-chat-trt-llm-smooth-quant/packages/inflight_batcher_llm/preprocessing/1/model.py
@@ -58,11 +58,9 @@ def initialize(self, args):
         model_config = json.loads(args["model_config"])
         tokenizer_dir = os.environ["triton_tokenizer_repository"]
         tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
-        self.add_special_tokens = model_config['parameters'].get(
-            'add_special_tokens',
-            {'string_value': "false"})['string_value'].lower() in [
-                'true', '1', 't', 'y', 'yes'
-            ]
+        self.add_special_tokens = model_config["parameters"].get(
+            "add_special_tokens", {"string_value": "false"}
+        )["string_value"].lower() in ["true", "1", "t", "y", "yes"]
 
         if tokenizer_type == "t5":
             self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
@@ -189,25 +187,32 @@ def finalize(self):
 
     def _create_request(self, query):
         """
-            query : batch string (2D numpy array)
+        query : batch string (2D numpy array)
         """
         start_ids = [
             np.array(
                 self.tokenizer.encode(
-                    s[0].decode(),
-                    add_special_tokens=self.add_special_tokens)).astype(int)
+                    s[0].decode(), add_special_tokens=self.add_special_tokens
+                )
+            ).astype(int)
             for s in query
         ]
         start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)
 
         max_len = 0
         for seq in start_ids:
             max_len = max(max_len, seq.shape[0])
-        start_ids = np.stack([
-            np.pad(seq, (0, max_len - seq.shape[0]),
-                   'constant',
-                   constant_values=(0, self.pad_id)) for seq in start_ids
-        ])
+        start_ids = np.stack(
+            [
+                np.pad(
+                    seq,
+                    (0, max_len - seq.shape[0]),
+                    "constant",
+                    constant_values=(0, self.pad_id),
+                )
+                for seq in start_ids
+            ]
+        )
 
         return start_ids, start_lengths
 

diff --git a/...ct-chat-trt-llm-weights-only-quant/packages/inflight_batcher_llm/preprocessing/1/model.py b/...ct-chat-trt-llm-weights-only-quant/packages/inflight_batcher_llm/preprocessing/1/model.py
@@ -58,11 +58,9 @@ def initialize(self, args):
         model_config = json.loads(args["model_config"])
         tokenizer_dir = os.environ["triton_tokenizer_repository"]
         tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
-        self.add_special_tokens = model_config['parameters'].get(
-            'add_special_tokens',
-            {'string_value': "false"})['string_value'].lower() in [
-                'true', '1', 't', 'y', 'yes'
-            ]
+        self.add_special_tokens = model_config["parameters"].get(
+            "add_special_tokens", {"string_value": "false"}
+        )["string_value"].lower() in ["true", "1", "t", "y", "yes"]
 
         if tokenizer_type == "t5":
             self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
@@ -189,25 +187,32 @@ def finalize(self):
 
     def _create_request(self, query):
         """
-            query : batch string (2D numpy array)
+        query : batch string (2D numpy array)
         """
         start_ids = [
             np.array(
                 self.tokenizer.encode(
-                    s[0].decode(),
-                    add_special_tokens=self.add_special_tokens)).astype(int)
+                    s[0].decode(), add_special_tokens=self.add_special_tokens
+                )
+            ).astype(int)
             for s in query
         ]
         start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)
 
         max_len = 0
         for seq in start_ids:
             max_len = max(max_len, seq.shape[0])
-        start_ids = np.stack([
-            np.pad(seq, (0, max_len - seq.shape[0]),
-                   'constant',
-                   constant_values=(0, self.pad_id)) for seq in start_ids
-        ])
+        start_ids = np.stack(
+            [
+                np.pad(
+                    seq,
+                    (0, max_len - seq.shape[0]),
+                    "constant",
+                    constant_values=(0, self.pad_id),
+                )
+                for seq in start_ids
+            ]
+        )
 
         return start_ids, start_lengths
 

diff --git a/...l/mistral-7b-instruct-chat-trt-llm/packages/inflight_batcher_llm/preprocessing/1/model.py b/...l/mistral-7b-instruct-chat-trt-llm/packages/inflight_batcher_llm/preprocessing/1/model.py
@@ -58,11 +58,9 @@ def initialize(self, args):
         model_config = json.loads(args["model_config"])
         tokenizer_dir = os.environ["triton_tokenizer_repository"]
         tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
-        self.add_special_tokens = model_config['parameters'].get(
-            'add_special_tokens',
-            {'string_value': "false"})['string_value'].lower() in [
-                'true', '1', 't', 'y', 'yes'
-            ]
+        self.add_special_tokens = model_config["parameters"].get(
+            "add_special_tokens", {"string_value": "false"}
+        )["string_value"].lower() in ["true", "1", "t", "y", "yes"]
 
         if tokenizer_type == "t5":
             self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
@@ -189,25 +187,32 @@ def finalize(self):
 
     def _create_request(self, query):
         """
-            query : batch string (2D numpy array)
+        query : batch string (2D numpy array)
         """
         start_ids = [
             np.array(
                 self.tokenizer.encode(
-                    s[0].decode(),
-                    add_special_tokens=self.add_special_tokens)).astype(int)
+                    s[0].decode(), add_special_tokens=self.add_special_tokens
+                )
+            ).astype(int)
             for s in query
         ]
         start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)
 
         max_len = 0
         for seq in start_ids:
             max_len = max(max_len, seq.shape[0])
-        start_ids = np.stack([
-            np.pad(seq, (0, max_len - seq.shape[0]),
-                   'constant',
-                   constant_values=(0, self.pad_id)) for seq in start_ids
-        ])
+        start_ids = np.stack(
+            [
+                np.pad(
+                    seq,
+                    (0, max_len - seq.shape[0]),
+                    "constant",
+                    constant_values=(0, self.pad_id),
+                )
+                for seq in start_ids
+            ]
+        )
 
         return start_ids, start_lengths
 

diff --git a/...al/mistral-7b-trt-llm-build-engine/packages/inflight_batcher_llm/preprocessing/1/model.py b/...al/mistral-7b-trt-llm-build-engine/packages/inflight_batcher_llm/preprocessing/1/model.py
@@ -58,11 +58,9 @@ def initialize(self, args):
         model_config = json.loads(args["model_config"])
         tokenizer_dir = os.environ["triton_tokenizer_repository"]
         tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
-        self.add_special_tokens = model_config['parameters'].get(
-            'add_special_tokens',
-            {'string_value': "false"})['string_value'].lower() in [
-                'true', '1', 't', 'y', 'yes'
-            ]
+        self.add_special_tokens = model_config["parameters"].get(
+            "add_special_tokens", {"string_value": "false"}
+        )["string_value"].lower() in ["true", "1", "t", "y", "yes"]
 
         if tokenizer_type == "t5":
             self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
@@ -189,25 +187,32 @@ def finalize(self):
 
     def _create_request(self, query):
         """
-            query : batch string (2D numpy array)
+        query : batch string (2D numpy array)
         """
         start_ids = [
             np.array(
                 self.tokenizer.encode(
-                    s[0].decode(),
-                    add_special_tokens=self.add_special_tokens)).astype(int)
+                    s[0].decode(), add_special_tokens=self.add_special_tokens
+                )
+            ).astype(int)
             for s in query
         ]
         start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)
 
         max_len = 0
         for seq in start_ids:
             max_len = max(max_len, seq.shape[0])
-        start_ids = np.stack([
-            np.pad(seq, (0, max_len - seq.shape[0]),
-                   'constant',
-                   constant_values=(0, self.pad_id)) for seq in start_ids
-        ])
+        start_ids = np.stack(
+            [
+                np.pad(
+                    seq,
+                    (0, max_len - seq.shape[0]),
+                    "constant",
+                    constant_values=(0, self.pad_id),
+                )
+                for seq in start_ids
+            ]
+        )
 
         return start_ids, start_lengths
 

diff --git a/...nstruct-trt-llm-weights-only-quant/packages/inflight_batcher_llm/preprocessing/1/model.py b/...nstruct-trt-llm-weights-only-quant/packages/inflight_batcher_llm/preprocessing/1/model.py
@@ -58,11 +58,9 @@ def initialize(self, args):
         model_config = json.loads(args["model_config"])
         tokenizer_dir = os.environ["triton_tokenizer_repository"]
         tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
-        self.add_special_tokens = model_config['parameters'].get(
-            'add_special_tokens',
-            {'string_value': "false"})['string_value'].lower() in [
-                'true', '1', 't', 'y', 'yes'
-            ]
+        self.add_special_tokens = model_config["parameters"].get(
+            "add_special_tokens", {"string_value": "false"}
+        )["string_value"].lower() in ["true", "1", "t", "y", "yes"]
 
         if tokenizer_type == "t5":
             self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
@@ -189,25 +187,32 @@ def finalize(self):
 
     def _create_request(self, query):
         """
-            query : batch string (2D numpy array)
+        query : batch string (2D numpy array)
         """
         start_ids = [
             np.array(
                 self.tokenizer.encode(
-                    s[0].decode(),
-                    add_special_tokens=self.add_special_tokens)).astype(int)
+                    s[0].decode(), add_special_tokens=self.add_special_tokens
+                )
+            ).astype(int)
             for s in query
         ]
         start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)
 
         max_len = 0
         for seq in start_ids:
             max_len = max(max_len, seq.shape[0])
-        start_ids = np.stack([
-            np.pad(seq, (0, max_len - seq.shape[0]),
-                   'constant',
-                   constant_values=(0, self.pad_id)) for seq in start_ids
-        ])
+        start_ids = np.stack(
+            [
+                np.pad(
+                    seq,
+                    (0, max_len - seq.shape[0]),
+                    "constant",
+                    constant_values=(0, self.pad_id),
+                )
+                for seq in start_ids
+            ]
+        )
 
         return start_ids, start_lengths
 

diff --git a/mistral/mixtral-8x7b-instruct-trt-llm/packages/inflight_batcher_llm/preprocessing/1/model.py b/mistral/mixtral-8x7b-instruct-trt-llm/packages/inflight_batcher_llm/preprocessing/1/model.py
@@ -58,11 +58,9 @@ def initialize(self, args):
         model_config = json.loads(args["model_config"])
         tokenizer_dir = os.environ["triton_tokenizer_repository"]
         tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
-        self.add_special_tokens = model_config['parameters'].get(
-            'add_special_tokens',
-            {'string_value': "false"})['string_value'].lower() in [
-                'true', '1', 't', 'y', 'yes'
-            ]
+        self.add_special_tokens = model_config["parameters"].get(
+            "add_special_tokens", {"string_value": "false"}
+        )["string_value"].lower() in ["true", "1", "t", "y", "yes"]
 
         if tokenizer_type == "t5":
             self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
@@ -189,25 +187,32 @@ def finalize(self):
 
     def _create_request(self, query):
         """
-            query : batch string (2D numpy array)
+        query : batch string (2D numpy array)
         """
         start_ids = [
             np.array(
                 self.tokenizer.encode(
-                    s[0].decode(),
-                    add_special_tokens=self.add_special_tokens)).astype(int)
+                    s[0].decode(), add_special_tokens=self.add_special_tokens
+                )
+            ).astype(int)
             for s in query
         ]
         start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)
 
         max_len = 0
         for seq in start_ids:
             max_len = max(max_len, seq.shape[0])
-        start_ids = np.stack([
-            np.pad(seq, (0, max_len - seq.shape[0]),
-                   'constant',
-                   constant_values=(0, self.pad_id)) for seq in start_ids
-        ])
+        start_ids = np.stack(
+            [
+                np.pad(
+                    seq,
+                    (0, max_len - seq.shape[0]),
+                    "constant",
+                    constant_values=(0, self.pad_id),
+                )
+                for seq in start_ids
+            ]
+        )
 
         return start_ids, start_lengths