Skip to content

Commit

Permalink
reformat
Browse files Browse the repository at this point in the history
  • Loading branch information
Timur Abishev committed Jan 5, 2024
1 parent 2145c38 commit f7924ef
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 104 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,9 @@ def initialize(self, args):
model_config = json.loads(args["model_config"])
tokenizer_dir = os.environ["triton_tokenizer_repository"]
tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
self.add_special_tokens = model_config['parameters'].get(
'add_special_tokens',
{'string_value': "false"})['string_value'].lower() in [
'true', '1', 't', 'y', 'yes'
]
self.add_special_tokens = model_config["parameters"].get(
"add_special_tokens", {"string_value": "false"}
)["string_value"].lower() in ["true", "1", "t", "y", "yes"]

if tokenizer_type == "t5":
self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
Expand Down Expand Up @@ -189,25 +187,32 @@ def finalize(self):

def _create_request(self, query):
"""
query : batch string (2D numpy array)
query : batch string (2D numpy array)
"""
start_ids = [
np.array(
self.tokenizer.encode(
s[0].decode(),
add_special_tokens=self.add_special_tokens)).astype(int)
s[0].decode(), add_special_tokens=self.add_special_tokens
)
).astype(int)
for s in query
]
start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)

max_len = 0
for seq in start_ids:
max_len = max(max_len, seq.shape[0])
start_ids = np.stack([
np.pad(seq, (0, max_len - seq.shape[0]),
'constant',
constant_values=(0, self.pad_id)) for seq in start_ids
])
start_ids = np.stack(
[
np.pad(
seq,
(0, max_len - seq.shape[0]),
"constant",
constant_values=(0, self.pad_id),
)
for seq in start_ids
]
)

return start_ids, start_lengths

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,9 @@ def initialize(self, args):
model_config = json.loads(args["model_config"])
tokenizer_dir = os.environ["triton_tokenizer_repository"]
tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
self.add_special_tokens = model_config['parameters'].get(
'add_special_tokens',
{'string_value': "false"})['string_value'].lower() in [
'true', '1', 't', 'y', 'yes'
]
self.add_special_tokens = model_config["parameters"].get(
"add_special_tokens", {"string_value": "false"}
)["string_value"].lower() in ["true", "1", "t", "y", "yes"]

if tokenizer_type == "t5":
self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
Expand Down Expand Up @@ -189,25 +187,32 @@ def finalize(self):

def _create_request(self, query):
"""
query : batch string (2D numpy array)
query : batch string (2D numpy array)
"""
start_ids = [
np.array(
self.tokenizer.encode(
s[0].decode(),
add_special_tokens=self.add_special_tokens)).astype(int)
s[0].decode(), add_special_tokens=self.add_special_tokens
)
).astype(int)
for s in query
]
start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)

max_len = 0
for seq in start_ids:
max_len = max(max_len, seq.shape[0])
start_ids = np.stack([
np.pad(seq, (0, max_len - seq.shape[0]),
'constant',
constant_values=(0, self.pad_id)) for seq in start_ids
])
start_ids = np.stack(
[
np.pad(
seq,
(0, max_len - seq.shape[0]),
"constant",
constant_values=(0, self.pad_id),
)
for seq in start_ids
]
)

return start_ids, start_lengths

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,9 @@ def initialize(self, args):
model_config = json.loads(args["model_config"])
tokenizer_dir = os.environ["triton_tokenizer_repository"]
tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
self.add_special_tokens = model_config['parameters'].get(
'add_special_tokens',
{'string_value': "false"})['string_value'].lower() in [
'true', '1', 't', 'y', 'yes'
]
self.add_special_tokens = model_config["parameters"].get(
"add_special_tokens", {"string_value": "false"}
)["string_value"].lower() in ["true", "1", "t", "y", "yes"]

if tokenizer_type == "t5":
self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
Expand Down Expand Up @@ -189,25 +187,32 @@ def finalize(self):

def _create_request(self, query):
"""
query : batch string (2D numpy array)
query : batch string (2D numpy array)
"""
start_ids = [
np.array(
self.tokenizer.encode(
s[0].decode(),
add_special_tokens=self.add_special_tokens)).astype(int)
s[0].decode(), add_special_tokens=self.add_special_tokens
)
).astype(int)
for s in query
]
start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)

max_len = 0
for seq in start_ids:
max_len = max(max_len, seq.shape[0])
start_ids = np.stack([
np.pad(seq, (0, max_len - seq.shape[0]),
'constant',
constant_values=(0, self.pad_id)) for seq in start_ids
])
start_ids = np.stack(
[
np.pad(
seq,
(0, max_len - seq.shape[0]),
"constant",
constant_values=(0, self.pad_id),
)
for seq in start_ids
]
)

return start_ids, start_lengths

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,9 @@ def initialize(self, args):
model_config = json.loads(args["model_config"])
tokenizer_dir = os.environ["triton_tokenizer_repository"]
tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
self.add_special_tokens = model_config['parameters'].get(
'add_special_tokens',
{'string_value': "false"})['string_value'].lower() in [
'true', '1', 't', 'y', 'yes'
]
self.add_special_tokens = model_config["parameters"].get(
"add_special_tokens", {"string_value": "false"}
)["string_value"].lower() in ["true", "1", "t", "y", "yes"]

if tokenizer_type == "t5":
self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
Expand Down Expand Up @@ -189,25 +187,32 @@ def finalize(self):

def _create_request(self, query):
"""
query : batch string (2D numpy array)
query : batch string (2D numpy array)
"""
start_ids = [
np.array(
self.tokenizer.encode(
s[0].decode(),
add_special_tokens=self.add_special_tokens)).astype(int)
s[0].decode(), add_special_tokens=self.add_special_tokens
)
).astype(int)
for s in query
]
start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)

max_len = 0
for seq in start_ids:
max_len = max(max_len, seq.shape[0])
start_ids = np.stack([
np.pad(seq, (0, max_len - seq.shape[0]),
'constant',
constant_values=(0, self.pad_id)) for seq in start_ids
])
start_ids = np.stack(
[
np.pad(
seq,
(0, max_len - seq.shape[0]),
"constant",
constant_values=(0, self.pad_id),
)
for seq in start_ids
]
)

return start_ids, start_lengths

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,9 @@ def initialize(self, args):
model_config = json.loads(args["model_config"])
tokenizer_dir = os.environ["triton_tokenizer_repository"]
tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
self.add_special_tokens = model_config['parameters'].get(
'add_special_tokens',
{'string_value': "false"})['string_value'].lower() in [
'true', '1', 't', 'y', 'yes'
]
self.add_special_tokens = model_config["parameters"].get(
"add_special_tokens", {"string_value": "false"}
)["string_value"].lower() in ["true", "1", "t", "y", "yes"]

if tokenizer_type == "t5":
self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
Expand Down Expand Up @@ -189,25 +187,32 @@ def finalize(self):

def _create_request(self, query):
"""
query : batch string (2D numpy array)
query : batch string (2D numpy array)
"""
start_ids = [
np.array(
self.tokenizer.encode(
s[0].decode(),
add_special_tokens=self.add_special_tokens)).astype(int)
s[0].decode(), add_special_tokens=self.add_special_tokens
)
).astype(int)
for s in query
]
start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)

max_len = 0
for seq in start_ids:
max_len = max(max_len, seq.shape[0])
start_ids = np.stack([
np.pad(seq, (0, max_len - seq.shape[0]),
'constant',
constant_values=(0, self.pad_id)) for seq in start_ids
])
start_ids = np.stack(
[
np.pad(
seq,
(0, max_len - seq.shape[0]),
"constant",
constant_values=(0, self.pad_id),
)
for seq in start_ids
]
)

return start_ids, start_lengths

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,9 @@ def initialize(self, args):
model_config = json.loads(args["model_config"])
tokenizer_dir = os.environ["triton_tokenizer_repository"]
tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
self.add_special_tokens = model_config['parameters'].get(
'add_special_tokens',
{'string_value': "false"})['string_value'].lower() in [
'true', '1', 't', 'y', 'yes'
]
self.add_special_tokens = model_config["parameters"].get(
"add_special_tokens", {"string_value": "false"}
)["string_value"].lower() in ["true", "1", "t", "y", "yes"]

if tokenizer_type == "t5":
self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
Expand Down Expand Up @@ -189,25 +187,32 @@ def finalize(self):

def _create_request(self, query):
"""
query : batch string (2D numpy array)
query : batch string (2D numpy array)
"""
start_ids = [
np.array(
self.tokenizer.encode(
s[0].decode(),
add_special_tokens=self.add_special_tokens)).astype(int)
s[0].decode(), add_special_tokens=self.add_special_tokens
)
).astype(int)
for s in query
]
start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)

max_len = 0
for seq in start_ids:
max_len = max(max_len, seq.shape[0])
start_ids = np.stack([
np.pad(seq, (0, max_len - seq.shape[0]),
'constant',
constant_values=(0, self.pad_id)) for seq in start_ids
])
start_ids = np.stack(
[
np.pad(
seq,
(0, max_len - seq.shape[0]),
"constant",
constant_values=(0, self.pad_id),
)
for seq in start_ids
]
)

return start_ids, start_lengths

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,9 @@ def initialize(self, args):
model_config = json.loads(args["model_config"])
tokenizer_dir = os.environ["triton_tokenizer_repository"]
tokenizer_type = model_config["parameters"]["tokenizer_type"]["string_value"]
self.add_special_tokens = model_config['parameters'].get(
'add_special_tokens',
{'string_value': "false"})['string_value'].lower() in [
'true', '1', 't', 'y', 'yes'
]
self.add_special_tokens = model_config["parameters"].get(
"add_special_tokens", {"string_value": "false"}
)["string_value"].lower() in ["true", "1", "t", "y", "yes"]

if tokenizer_type == "t5":
self.tokenizer = T5Tokenizer(vocab_file=tokenizer_dir, padding_side="left")
Expand Down Expand Up @@ -189,25 +187,32 @@ def finalize(self):

def _create_request(self, query):
"""
query : batch string (2D numpy array)
query : batch string (2D numpy array)
"""
start_ids = [
np.array(
self.tokenizer.encode(
s[0].decode(),
add_special_tokens=self.add_special_tokens)).astype(int)
s[0].decode(), add_special_tokens=self.add_special_tokens
)
).astype(int)
for s in query
]
start_lengths = np.array([[len(ids)] for ids in start_ids]).astype(int)

max_len = 0
for seq in start_ids:
max_len = max(max_len, seq.shape[0])
start_ids = np.stack([
np.pad(seq, (0, max_len - seq.shape[0]),
'constant',
constant_values=(0, self.pad_id)) for seq in start_ids
])
start_ids = np.stack(
[
np.pad(
seq,
(0, max_len - seq.shape[0]),
"constant",
constant_values=(0, self.pad_id),
)
for seq in start_ids
]
)

return start_ids, start_lengths

Expand Down
Loading

0 comments on commit f7924ef

Please sign in to comment.