diff --git a/friendli/schema/api/v1/codegen/chat_completions_pb2.py b/friendli/schema/api/v1/codegen/chat_completions_pb2.py
index 9a5484e5..523f2690 100644
--- a/friendli/schema/api/v1/codegen/chat_completions_pb2.py
+++ b/friendli/schema/api/v1/codegen/chat_completions_pb2.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-present, FriendliAI Inc. All rights reserved.
+# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
 
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
@@ -17,7 +17,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x16\x63hat_completions.proto"\xd3\x03\n\x18V1ChatCompletionsRequest\x12\x33\n\x08messages\x18\x01 \x03(\x0b\x32!.V1ChatCompletionsRequest.Message\x12\x1e\n\x11\x66requency_penalty\x18\x03 \x01(\x02H\x00\x88\x01\x01\x12\x17\n\nmax_tokens\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x01n\x18\x06 \x01(\x05H\x02\x88\x01\x01\x12\x1d\n\x10presence_penalty\x18\x07 \x01(\x02H\x03\x88\x01\x01\x12\x0c\n\x04stop\x18\x08 \x03(\t\x12\x13\n\x06stream\x18\t \x01(\x08H\x04\x88\x01\x01\x12\x18\n\x0btemperature\x18\n \x01(\x02H\x05\x88\x01\x01\x12\x12\n\x05top_p\x18\x0b \x01(\x02H\x06\x88\x01\x01\x12!\n\x14timeout_microseconds\x18\x1e \x01(\x05H\x07\x88\x01\x01\x1a(\n\x07Message\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12\x0c\n\x04role\x18\x02 \x01(\tB\x14\n\x12_frequency_penaltyB\r\n\x0b_max_tokensB\x04\n\x02_nB\x13\n\x11_presence_penaltyB\t\n\x07_streamB\x0e\n\x0c_temperatureB\x08\n\x06_top_pB\x17\n\x15_timeout_microsecondsb\x06proto3'
+    b'\n\x16\x63hat_completions.proto"\xf1\x03\n\x18V1ChatCompletionsRequest\x12\x33\n\x08messages\x18\x01 \x03(\x0b\x32!.V1ChatCompletionsRequest.Message\x12\x12\n\x05model\x18\x02 \x01(\tH\x00\x88\x01\x01\x12\x1e\n\x11\x66requency_penalty\x18\x03 \x01(\x02H\x01\x88\x01\x01\x12\x17\n\nmax_tokens\x18\x05 \x01(\x05H\x02\x88\x01\x01\x12\x0e\n\x01n\x18\x06 \x01(\x05H\x03\x88\x01\x01\x12\x1d\n\x10presence_penalty\x18\x07 \x01(\x02H\x04\x88\x01\x01\x12\x0c\n\x04stop\x18\x08 \x03(\t\x12\x13\n\x06stream\x18\t \x01(\x08H\x05\x88\x01\x01\x12\x18\n\x0btemperature\x18\n \x01(\x02H\x06\x88\x01\x01\x12\x12\n\x05top_p\x18\x0b \x01(\x02H\x07\x88\x01\x01\x12!\n\x14timeout_microseconds\x18\x1e \x01(\x05H\x08\x88\x01\x01\x1a(\n\x07Message\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12\x0c\n\x04role\x18\x02 \x01(\tB\x08\n\x06_modelB\x14\n\x12_frequency_penaltyB\r\n\x0b_max_tokensB\x04\n\x02_nB\x13\n\x11_presence_penaltyB\t\n\x07_streamB\x0e\n\x0c_temperatureB\x08\n\x06_top_pB\x17\n\x15_timeout_microsecondsb\x06proto3'
 )
 
 _globals = globals()
@@ -26,7 +26,7 @@
 if _descriptor._USE_C_DESCRIPTORS == False:
     DESCRIPTOR._options = None
     _globals["_V1CHATCOMPLETIONSREQUEST"]._serialized_start = 27
-    _globals["_V1CHATCOMPLETIONSREQUEST"]._serialized_end = 494
-    _globals["_V1CHATCOMPLETIONSREQUEST_MESSAGE"]._serialized_start = 328
-    _globals["_V1CHATCOMPLETIONSREQUEST_MESSAGE"]._serialized_end = 368
+    _globals["_V1CHATCOMPLETIONSREQUEST"]._serialized_end = 524
+    _globals["_V1CHATCOMPLETIONSREQUEST_MESSAGE"]._serialized_start = 348
+    _globals["_V1CHATCOMPLETIONSREQUEST_MESSAGE"]._serialized_end = 388
 # @@protoc_insertion_point(module_scope)
diff --git a/friendli/schema/api/v1/codegen/chat_completions_pb2.pyi b/friendli/schema/api/v1/codegen/chat_completions_pb2.pyi
index cbbf783b..6ca488f6 100644
--- a/friendli/schema/api/v1/codegen/chat_completions_pb2.pyi
+++ b/friendli/schema/api/v1/codegen/chat_completions_pb2.pyi
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-present, FriendliAI Inc. All rights reserved.
+# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
 
 from __future__ import annotations
 
@@ -17,6 +17,7 @@ DESCRIPTOR: _descriptor.FileDescriptor
 class V1ChatCompletionsRequest(_message.Message):
     __slots__ = [
         "messages",
+        "model",
         "frequency_penalty",
         "max_tokens",
         "n",
@@ -38,6 +39,7 @@ class V1ChatCompletionsRequest(_message.Message):
             self, content: _Optional[str] = ..., role: _Optional[str] = ...
         ) -> None: ...
     MESSAGES_FIELD_NUMBER: _ClassVar[int]
+    MODEL_FIELD_NUMBER: _ClassVar[int]
     FREQUENCY_PENALTY_FIELD_NUMBER: _ClassVar[int]
     MAX_TOKENS_FIELD_NUMBER: _ClassVar[int]
     N_FIELD_NUMBER: _ClassVar[int]
@@ -50,6 +52,7 @@ class V1ChatCompletionsRequest(_message.Message):
     messages: _containers.RepeatedCompositeFieldContainer[
         V1ChatCompletionsRequest.Message
     ]
+    model: str
     frequency_penalty: float
     max_tokens: int
     n: int
@@ -64,6 +67,7 @@ class V1ChatCompletionsRequest(_message.Message):
         messages: _Optional[
             _Iterable[_Union[V1ChatCompletionsRequest.Message, _Mapping]]
         ] = ...,
+        model: _Optional[str] = ...,
         frequency_penalty: _Optional[float] = ...,
         max_tokens: _Optional[int] = ...,
         n: _Optional[int] = ...,
diff --git a/friendli/schema/api/v1/codegen/completions_pb2.py b/friendli/schema/api/v1/codegen/completions_pb2.py
index 890c880a..0480f640 100644
--- a/friendli/schema/api/v1/codegen/completions_pb2.py
+++ b/friendli/schema/api/v1/codegen/completions_pb2.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-present, FriendliAI Inc. All rights reserved.
+# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
 
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
@@ -17,7 +17,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x11\x63ompletions.proto"\xb8\r\n\x14V1CompletionsRequest\x12\x13\n\x06stream\x18\x01 \x01(\x08H\x00\x88\x01\x01\x12\x13\n\x06prompt\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x0e\n\x06tokens\x18\x04 \x03(\x05\x12!\n\x14timeout_microseconds\x18\x05 \x01(\x05H\x02\x88\x01\x01\x12\x17\n\nmax_tokens\x18\x06 \x01(\x05H\x03\x88\x01\x01\x12\x1d\n\x10max_total_tokens\x18\x07 \x01(\x05H\x04\x88\x01\x01\x12\x17\n\nmin_tokens\x18\x08 \x01(\x05H\x05\x88\x01\x01\x12\x1d\n\x10min_total_tokens\x18\t \x01(\x05H\x06\x88\x01\x01\x12\x0e\n\x01n\x18\n \x01(\x05H\x07\x88\x01\x01\x12\x16\n\tnum_beams\x18\x0b \x01(\x05H\x08\x88\x01\x01\x12\x1b\n\x0elength_penalty\x18\x0c \x01(\x02H\t\x88\x01\x01\x12\x1b\n\x0e\x65\x61rly_stopping\x18\x0f \x01(\x08H\n\x88\x01\x01\x12\x1c\n\x0fno_repeat_ngram\x18\x11 \x01(\x05H\x0b\x88\x01\x01\x12$\n\x17\x65ncoder_no_repeat_ngram\x18\x12 \x01(\x05H\x0c\x88\x01\x01\x12\x1f\n\x12repetition_penalty\x18\x13 \x01(\x02H\r\x88\x01\x01\x12\'\n\x1a\x65ncoder_repetition_penalty\x18" \x01(\x02H\x0e\x88\x01\x01\x12\x1e\n\x11\x66requency_penalty\x18\x35 \x01(\x02H\x0f\x88\x01\x01\x12\x1d\n\x10presence_penalty\x18\x36 \x01(\x02H\x10\x88\x01\x01\x12\x18\n\x0btemperature\x18\x14 \x01(\x02H\x11\x88\x01\x01\x12\x12\n\x05top_k\x18\x15 \x01(\x05H\x12\x88\x01\x01\x12\x12\n\x05top_p\x18\x16 \x01(\x02H\x13\x88\x01\x01\x12\x0c\n\x04stop\x18\x17 \x03(\t\x12\x38\n\x0bstop_tokens\x18\x18 \x03(\x0b\x32#.V1CompletionsRequest.TokenSequence\x12\x0c\n\x04seed\x18\x1a \x03(\x04\x12\x1e\n\x16token_index_to_replace\x18\x1b \x03(\x05\x12\x1c\n\x14\x65mbedding_to_replace\x18\x1c \x03(\x02\x12\x43\n\x10\x62\x65\x61m_search_type\x18\x1d \x01(\x0e\x32$.V1CompletionsRequest.BeamSearchTypeH\x14\x88\x01\x01\x12*\n\x1d\x62\x65\x61m_compat_pre_normalization\x18\x1e \x01(\x08H\x15\x88\x01\x01\x12.\n!beam_compat_no_post_normalization\x18\x1f \x01(\x08H\x16\x88\x01\x01\x12\x11\n\tbad_words\x18  \x03(\t\x12<\n\x0f\x62\x61\x64_word_tokens\x18! \x03(\x0b\x32#.V1CompletionsRequest.TokenSequence\x12"\n\x15include_output_logits\x18/ \x01(\x08H\x17\x88\x01\x01\x12$\n\x17include_output_logprobs\x18\x32 \x01(\x08H\x18\x88\x01\x01\x12\x1c\n\x14\x66orced_output_tokens\x18\x33 \x03(\x05\x12\x11\n\teos_token\x18. \x03(\x05\x1a\x1f\n\rTokenSequence\x12\x0e\n\x06tokens\x18\x01 \x03(\x05"G\n\x0e\x42\x65\x61mSearchType\x12\x11\n\rDETERMINISTIC\x10\x00\x12\x0e\n\nSTOCHASTIC\x10\x01\x12\x12\n\x0eNAIVE_SAMPLING\x10\x02\x42\t\n\x07_streamB\t\n\x07_promptB\x17\n\x15_timeout_microsecondsB\r\n\x0b_max_tokensB\x13\n\x11_max_total_tokensB\r\n\x0b_min_tokensB\x13\n\x11_min_total_tokensB\x04\n\x02_nB\x0c\n\n_num_beamsB\x11\n\x0f_length_penaltyB\x11\n\x0f_early_stoppingB\x12\n\x10_no_repeat_ngramB\x1a\n\x18_encoder_no_repeat_ngramB\x15\n\x13_repetition_penaltyB\x1d\n\x1b_encoder_repetition_penaltyB\x14\n\x12_frequency_penaltyB\x13\n\x11_presence_penaltyB\x0e\n\x0c_temperatureB\x08\n\x06_top_kB\x08\n\x06_top_pB\x13\n\x11_beam_search_typeB \n\x1e_beam_compat_pre_normalizationB$\n"_beam_compat_no_post_normalizationB\x18\n\x16_include_output_logitsB\x1a\n\x18_include_output_logprobsb\x06proto3'
+    b'\n\x11\x63ompletions.proto"\xd6\r\n\x14V1CompletionsRequest\x12\x13\n\x06stream\x18\x01 \x01(\x08H\x00\x88\x01\x01\x12\x12\n\x05model\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\x06prompt\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x0e\n\x06tokens\x18\x04 \x03(\x05\x12!\n\x14timeout_microseconds\x18\x05 \x01(\x05H\x03\x88\x01\x01\x12\x17\n\nmax_tokens\x18\x06 \x01(\x05H\x04\x88\x01\x01\x12\x1d\n\x10max_total_tokens\x18\x07 \x01(\x05H\x05\x88\x01\x01\x12\x17\n\nmin_tokens\x18\x08 \x01(\x05H\x06\x88\x01\x01\x12\x1d\n\x10min_total_tokens\x18\t \x01(\x05H\x07\x88\x01\x01\x12\x0e\n\x01n\x18\n \x01(\x05H\x08\x88\x01\x01\x12\x16\n\tnum_beams\x18\x0b \x01(\x05H\t\x88\x01\x01\x12\x1b\n\x0elength_penalty\x18\x0c \x01(\x02H\n\x88\x01\x01\x12\x1b\n\x0e\x65\x61rly_stopping\x18\x0f \x01(\x08H\x0b\x88\x01\x01\x12\x1c\n\x0fno_repeat_ngram\x18\x11 \x01(\x05H\x0c\x88\x01\x01\x12$\n\x17\x65ncoder_no_repeat_ngram\x18\x12 \x01(\x05H\r\x88\x01\x01\x12\x1f\n\x12repetition_penalty\x18\x13 \x01(\x02H\x0e\x88\x01\x01\x12\'\n\x1a\x65ncoder_repetition_penalty\x18" \x01(\x02H\x0f\x88\x01\x01\x12\x1e\n\x11\x66requency_penalty\x18\x35 \x01(\x02H\x10\x88\x01\x01\x12\x1d\n\x10presence_penalty\x18\x36 \x01(\x02H\x11\x88\x01\x01\x12\x18\n\x0btemperature\x18\x14 \x01(\x02H\x12\x88\x01\x01\x12\x12\n\x05top_k\x18\x15 \x01(\x05H\x13\x88\x01\x01\x12\x12\n\x05top_p\x18\x16 \x01(\x02H\x14\x88\x01\x01\x12\x0c\n\x04stop\x18\x17 \x03(\t\x12\x38\n\x0bstop_tokens\x18\x18 \x03(\x0b\x32#.V1CompletionsRequest.TokenSequence\x12\x0c\n\x04seed\x18\x1a \x03(\x04\x12\x1e\n\x16token_index_to_replace\x18\x1b \x03(\x05\x12\x1c\n\x14\x65mbedding_to_replace\x18\x1c \x03(\x02\x12\x43\n\x10\x62\x65\x61m_search_type\x18\x1d \x01(\x0e\x32$.V1CompletionsRequest.BeamSearchTypeH\x15\x88\x01\x01\x12*\n\x1d\x62\x65\x61m_compat_pre_normalization\x18\x1e \x01(\x08H\x16\x88\x01\x01\x12.\n!beam_compat_no_post_normalization\x18\x1f \x01(\x08H\x17\x88\x01\x01\x12\x11\n\tbad_words\x18  \x03(\t\x12<\n\x0f\x62\x61\x64_word_tokens\x18! \x03(\x0b\x32#.V1CompletionsRequest.TokenSequence\x12"\n\x15include_output_logits\x18/ \x01(\x08H\x18\x88\x01\x01\x12$\n\x17include_output_logprobs\x18\x32 \x01(\x08H\x19\x88\x01\x01\x12\x1c\n\x14\x66orced_output_tokens\x18\x33 \x03(\x05\x12\x11\n\teos_token\x18. \x03(\x05\x1a\x1f\n\rTokenSequence\x12\x0e\n\x06tokens\x18\x01 \x03(\x05"G\n\x0e\x42\x65\x61mSearchType\x12\x11\n\rDETERMINISTIC\x10\x00\x12\x0e\n\nSTOCHASTIC\x10\x01\x12\x12\n\x0eNAIVE_SAMPLING\x10\x02\x42\t\n\x07_streamB\x08\n\x06_modelB\t\n\x07_promptB\x17\n\x15_timeout_microsecondsB\r\n\x0b_max_tokensB\x13\n\x11_max_total_tokensB\r\n\x0b_min_tokensB\x13\n\x11_min_total_tokensB\x04\n\x02_nB\x0c\n\n_num_beamsB\x11\n\x0f_length_penaltyB\x11\n\x0f_early_stoppingB\x12\n\x10_no_repeat_ngramB\x1a\n\x18_encoder_no_repeat_ngramB\x15\n\x13_repetition_penaltyB\x1d\n\x1b_encoder_repetition_penaltyB\x14\n\x12_frequency_penaltyB\x13\n\x11_presence_penaltyB\x0e\n\x0c_temperatureB\x08\n\x06_top_kB\x08\n\x06_top_pB\x13\n\x11_beam_search_typeB \n\x1e_beam_compat_pre_normalizationB$\n"_beam_compat_no_post_normalizationB\x18\n\x16_include_output_logitsB\x1a\n\x18_include_output_logprobsb\x06proto3'
 )
 
 _globals = globals()
@@ -26,9 +26,9 @@
 if _descriptor._USE_C_DESCRIPTORS == False:
     DESCRIPTOR._options = None
     _globals["_V1COMPLETIONSREQUEST"]._serialized_start = 22
-    _globals["_V1COMPLETIONSREQUEST"]._serialized_end = 1742
-    _globals["_V1COMPLETIONSREQUEST_TOKENSEQUENCE"]._serialized_start = 1133
-    _globals["_V1COMPLETIONSREQUEST_TOKENSEQUENCE"]._serialized_end = 1164
-    _globals["_V1COMPLETIONSREQUEST_BEAMSEARCHTYPE"]._serialized_start = 1166
-    _globals["_V1COMPLETIONSREQUEST_BEAMSEARCHTYPE"]._serialized_end = 1237
+    _globals["_V1COMPLETIONSREQUEST"]._serialized_end = 1772
+    _globals["_V1COMPLETIONSREQUEST_TOKENSEQUENCE"]._serialized_start = 1153
+    _globals["_V1COMPLETIONSREQUEST_TOKENSEQUENCE"]._serialized_end = 1184
+    _globals["_V1COMPLETIONSREQUEST_BEAMSEARCHTYPE"]._serialized_start = 1186
+    _globals["_V1COMPLETIONSREQUEST_BEAMSEARCHTYPE"]._serialized_end = 1257
 # @@protoc_insertion_point(module_scope)
diff --git a/friendli/schema/api/v1/codegen/completions_pb2.pyi b/friendli/schema/api/v1/codegen/completions_pb2.pyi
index e7189b2c..5a79d36d 100644
--- a/friendli/schema/api/v1/codegen/completions_pb2.pyi
+++ b/friendli/schema/api/v1/codegen/completions_pb2.pyi
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-present, FriendliAI Inc. All rights reserved.
+# Copyright (c) 2024-present, FriendliAI Inc. All rights reserved.
 
 from __future__ import annotations
 
@@ -18,6 +18,7 @@ DESCRIPTOR: _descriptor.FileDescriptor
 class V1CompletionsRequest(_message.Message):
     __slots__ = [
         "stream",
+        "model",
         "prompt",
         "tokens",
         "timeout_microseconds",
@@ -69,6 +70,7 @@ class V1CompletionsRequest(_message.Message):
         tokens: _containers.RepeatedScalarFieldContainer[int]
         def __init__(self, tokens: _Optional[_Iterable[int]] = ...) -> None: ...
     STREAM_FIELD_NUMBER: _ClassVar[int]
+    MODEL_FIELD_NUMBER: _ClassVar[int]
     PROMPT_FIELD_NUMBER: _ClassVar[int]
     TOKENS_FIELD_NUMBER: _ClassVar[int]
     TIMEOUT_MICROSECONDS_FIELD_NUMBER: _ClassVar[int]
@@ -104,6 +106,7 @@ class V1CompletionsRequest(_message.Message):
     FORCED_OUTPUT_TOKENS_FIELD_NUMBER: _ClassVar[int]
     EOS_TOKEN_FIELD_NUMBER: _ClassVar[int]
     stream: bool
+    model: str
     prompt: str
     tokens: _containers.RepeatedScalarFieldContainer[int]
     timeout_microseconds: int
@@ -145,6 +148,7 @@ class V1CompletionsRequest(_message.Message):
     def __init__(
         self,
         stream: bool = ...,
+        model: _Optional[str] = ...,
         prompt: _Optional[str] = ...,
         tokens: _Optional[_Iterable[int]] = ...,
         timeout_microseconds: _Optional[int] = ...,
diff --git a/friendli/schema/api/v1/codegen/text_to_image_pb2.py b/friendli/schema/api/v1/codegen/text_to_image_pb2.py
index 789ce218..2f06608b 100644
--- a/friendli/schema/api/v1/codegen/text_to_image_pb2.py
+++ b/friendli/schema/api/v1/codegen/text_to_image_pb2.py
@@ -17,7 +17,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x13text_to_image.proto"\xba\x02\n\x14V1TextToImageRequest\x12\x0e\n\x06prompt\x18\x01 \x01(\t\x12\x1c\n\x0fnegative_prompt\x18\x02 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0bnum_outputs\x18\x03 \x01(\x05H\x01\x88\x01\x01\x12 \n\x13num_inference_steps\x18\x04 \x01(\x05H\x02\x88\x01\x01\x12\x1b\n\x0eguidance_scale\x18\x05 \x01(\x02H\x03\x88\x01\x01\x12\x11\n\x04seed\x18\x06 \x01(\x05H\x04\x88\x01\x01\x12\x1c\n\x0fresponse_format\x18\x07 \x01(\tH\x05\x88\x01\x01\x42\x12\n\x10_negative_promptB\x0e\n\x0c_num_outputsB\x16\n\x14_num_inference_stepsB\x11\n\x0f_guidance_scaleB\x07\n\x05_seedB\x12\n\x10_response_formatb\x06proto3'
+    b'\n\x13text_to_image.proto"\xd8\x02\n\x14V1TextToImageRequest\x12\x0e\n\x06prompt\x18\x01 \x01(\t\x12\x1c\n\x0fnegative_prompt\x18\x02 \x01(\tH\x00\x88\x01\x01\x12\x18\n\x0bnum_outputs\x18\x03 \x01(\x05H\x01\x88\x01\x01\x12 \n\x13num_inference_steps\x18\x04 \x01(\x05H\x02\x88\x01\x01\x12\x1b\n\x0eguidance_scale\x18\x05 \x01(\x02H\x03\x88\x01\x01\x12\x11\n\x04seed\x18\x06 \x01(\x05H\x04\x88\x01\x01\x12\x1c\n\x0fresponse_format\x18\x07 \x01(\tH\x05\x88\x01\x01\x12\x12\n\x05model\x18\x08 \x01(\tH\x06\x88\x01\x01\x42\x12\n\x10_negative_promptB\x0e\n\x0c_num_outputsB\x16\n\x14_num_inference_stepsB\x11\n\x0f_guidance_scaleB\x07\n\x05_seedB\x12\n\x10_response_formatB\x08\n\x06_modelb\x06proto3'
 )
 
 _globals = globals()
@@ -26,5 +26,5 @@
 if _descriptor._USE_C_DESCRIPTORS == False:
     DESCRIPTOR._options = None
     _globals["_V1TEXTTOIMAGEREQUEST"]._serialized_start = 24
-    _globals["_V1TEXTTOIMAGEREQUEST"]._serialized_end = 338
+    _globals["_V1TEXTTOIMAGEREQUEST"]._serialized_end = 368
 # @@protoc_insertion_point(module_scope)
diff --git a/friendli/schema/api/v1/codegen/text_to_image_pb2.pyi b/friendli/schema/api/v1/codegen/text_to_image_pb2.pyi
index ce6fcfad..ba2c6297 100644
--- a/friendli/schema/api/v1/codegen/text_to_image_pb2.pyi
+++ b/friendli/schema/api/v1/codegen/text_to_image_pb2.pyi
@@ -19,6 +19,7 @@ class V1TextToImageRequest(_message.Message):
         "guidance_scale",
         "seed",
         "response_format",
+        "model",
     ]
     PROMPT_FIELD_NUMBER: _ClassVar[int]
     NEGATIVE_PROMPT_FIELD_NUMBER: _ClassVar[int]
@@ -27,6 +28,7 @@ class V1TextToImageRequest(_message.Message):
     GUIDANCE_SCALE_FIELD_NUMBER: _ClassVar[int]
     SEED_FIELD_NUMBER: _ClassVar[int]
     RESPONSE_FORMAT_FIELD_NUMBER: _ClassVar[int]
+    MODEL_FIELD_NUMBER: _ClassVar[int]
     prompt: str
     negative_prompt: str
     num_outputs: int
@@ -34,6 +36,7 @@ class V1TextToImageRequest(_message.Message):
     guidance_scale: float
     seed: int
     response_format: str
+    model: str
     def __init__(
         self,
         prompt: _Optional[str] = ...,
@@ -43,4 +46,5 @@ class V1TextToImageRequest(_message.Message):
         guidance_scale: _Optional[float] = ...,
         seed: _Optional[int] = ...,
         response_format: _Optional[str] = ...,
+        model: _Optional[str] = ...,
     ) -> None: ...
diff --git a/friendli/schema/api/v1/proto/chat_completions.proto b/friendli/schema/api/v1/proto/chat_completions.proto
index 3d7791ad..c01b6bc3 100644
--- a/friendli/schema/api/v1/proto/chat_completions.proto
+++ b/friendli/schema/api/v1/proto/chat_completions.proto
@@ -11,6 +11,7 @@ message V1ChatCompletionsRequest {
     }
 
     repeated Message messages = 1;
+    optional string model = 2;
     optional float frequency_penalty = 3;
     optional int32 max_tokens = 5;
     optional int32 n = 6;
diff --git a/friendli/schema/api/v1/proto/completions.proto b/friendli/schema/api/v1/proto/completions.proto
index b63c2836..c48c4680 100644
--- a/friendli/schema/api/v1/proto/completions.proto
+++ b/friendli/schema/api/v1/proto/completions.proto
@@ -16,6 +16,7 @@ message V1CompletionsRequest {
     }
 
     optional bool stream = 1;
+    optional string model = 2;
     optional string prompt = 3;
     repeated int32 tokens = 4;
     optional int32 timeout_microseconds = 5;
diff --git a/friendli/schema/api/v1/proto/text_to_image.proto b/friendli/schema/api/v1/proto/text_to_image.proto
index 1c717246..896116a3 100644
--- a/friendli/schema/api/v1/proto/text_to_image.proto
+++ b/friendli/schema/api/v1/proto/text_to_image.proto
@@ -5,11 +5,12 @@
 syntax = "proto3";
 
 message V1TextToImageRequest {
-    string prompt = 1;  
+    string prompt = 1;
     optional string negative_prompt = 2;
     optional int32 num_outputs = 3;
     optional int32 num_inference_steps = 4;
     optional float guidance_scale = 5;
     optional int32 seed = 6;
     optional string response_format = 7;
+    optional string model = 8;
 }
diff --git a/friendli/sdk/api/chat/completions.py b/friendli/sdk/api/chat/completions.py
index 48ba4ec5..efe339ae 100644
--- a/friendli/sdk/api/chat/completions.py
+++ b/friendli/sdk/api/chat/completions.py
@@ -40,9 +40,7 @@ def _method(self) -> str:
 
     @property
     def _content_type(self) -> str:
-        return (
-            "application/protobuf" if self._use_protobuf is None else "application/json"
-        )
+        return "application/protobuf" if self._use_protobuf else "application/json"
 
     @property
     def _request_pb_cls(self) -> Type[V1ChatCompletionsRequest]:
@@ -150,9 +148,7 @@ def _method(self) -> str:
 
     @property
     def _content_type(self) -> str:
-        return (
-            "application/protobuf" if self._use_protobuf is None else "application/json"
-        )
+        return "application/protobuf" if self._use_protobuf else "application/json"
 
     @property
     def _request_pb_cls(self) -> Type[V1ChatCompletionsRequest]:
diff --git a/friendli/sdk/api/completions.py b/friendli/sdk/api/completions.py
index e122f4cc..35cf367b 100644
--- a/friendli/sdk/api/completions.py
+++ b/friendli/sdk/api/completions.py
@@ -41,9 +41,7 @@ def _method(self) -> str:
 
     @property
     def _content_type(self) -> str:
-        return (
-            "application/protobuf" if self._use_protobuf is None else "application/json"
-        )
+        return "application/protobuf" if self._use_protobuf else "application/json"
 
     @property
     def _request_pb_cls(self) -> Type[V1CompletionsRequest]:
@@ -338,9 +336,7 @@ def _method(self) -> str:
 
     @property
     def _content_type(self) -> str:
-        return (
-            "application/protobuf" if self._use_protobuf is None else "application/json"
-        )
+        return "application/protobuf" if self._use_protobuf else "application/json"
 
     @property
     def _request_pb_cls(self) -> Type[V1CompletionsRequest]:
diff --git a/pyproject.toml b/pyproject.toml
index 229bb7c6..4c8c546b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "friendli-client"
-version = "1.3.0"
+version = "1.3.1"
 description = "Client of Friendli Suite."
 license = "Apache-2.0"
 authors = ["FriendliAI teams <eng@friendli.ai>"]