Skip to content

Commit

Permalink
make mlp_gated_linear_units a model conifg and fix num params count
Browse files Browse the repository at this point in the history
  • Loading branch information
cli99 committed Dec 19, 2023
1 parent 820e199 commit 5d2d2fc
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 11 deletions.
17 changes: 7 additions & 10 deletions llm_analysis/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def get_num_params_per_layer_mlp(self) -> int:
Returns:
int: the number of parameters in the two MLP linear layers
"""
return 2 * self.model_config.hidden_dim * self.model_config.ffn_embed_dim * self.model_config.moe_num_experts
return (3 if self.model_config.mlp_gated_linear_units else 2) * self.model_config.hidden_dim * self.model_config.ffn_embed_dim * self.model_config.moe_num_experts

def get_num_params_per_layer_router(self) -> int:
if self.model_config.moe_num_experts > 1:
Expand Down Expand Up @@ -775,7 +775,6 @@ def get_activation_memory_per_layer(
mlp_gelu_input_quant_bits: int = None,
mlp_2linear_quant_bits: int = None,
mlp_recompute_gelu: bool = False,
mlp_gated_linear_units: bool = False,
return_breakdown: bool = False,
) -> Union[float, tuple]:
"""Get the memory (in bytes) required to store the activations of a
Expand All @@ -798,7 +797,6 @@ def get_activation_memory_per_layer(
mlp_1linear_quant_bits (int, optional): number of bits to quantize the input activations of the first linear layer. Defaults to None.
mlp_gelu_input_quant_bits (int, optional): number of bits to quantize the GELU input activations. Defaults to None.
mlp_2linear_quant_bits (int, optional): number of bits to quantize the input activations of the second linear layer. Defaults to None. mlp_recompute_gelu (bool, optional): whether to recompute the gelu activation in the MLP backward pass. Defaults to False.
mlp_gated_linear_units (bool, optional): whether to use gated linear units in the MLP. Defaults to False.
Returns:
Union[float, tuple]: the memory (in bytes) required to store the activations of a transformer layer or a tuple of its breakdown
"""
Expand Down Expand Up @@ -839,7 +837,7 @@ def get_activation_memory_per_layer(
mlp_gelu_input_quant_bits=mlp_gelu_input_quant_bits,
mlp_2linear_quant_bits=mlp_2linear_quant_bits,
recompute_gelu=mlp_recompute_gelu,
gated_linear_units=mlp_gated_linear_units,
gated_linear_units=self.model_config.mlp_gated_linear_units,
))

activation_memory_per_layernorm = self.get_activation_memory_per_layernorm(
Expand Down Expand Up @@ -1521,6 +1519,7 @@ def output_summary_dict(
log_str = self.get_readable_summary_dict(summary_dict)
file_name = self.get_configs_desc(
) + output_file_suffix + "-summary-readable.txt"
file_name = output_file_suffix + "-summary-readable.txt"
with open(os.path.join(output_dir, file_name), "w") as f:
f.write(log_str)
logger.info(
Expand Down Expand Up @@ -1957,7 +1956,6 @@ def training(
mlp_gelu_input_quant_bits: int = None,
mlp_2linear_quant_bits: int = None,
mlp_recompute_gelu: bool = False,
mlp_gated_linear_units: bool = False,
output_dir: str = None,
output_file_suffix: str = "",
) -> dict:
Expand All @@ -1981,7 +1979,6 @@ def training(
mlp_gelu_input_quant_bits (int, optional): number of bits to quantize the GELU input activations. Defaults to None.
mlp_2linear_quant_bits (int, optional): number of bits to quantize the input activations of the second linear layer. Defaults to None.
mlp_recompute_gelu (bool, optional): whether to recompute the gelu activation in the MLP backward pass. Defaults to False.
mlp_gated_linear_units (bool, optional): whether to use gated linear units in the MLP. Defaults to False.
output_dir (str, optional): if set to a directory path, write the return summary dict out to the directory with the setup. Defaults to None.
Returns:
Expand Down Expand Up @@ -2078,7 +2075,6 @@ def training(
mlp_gelu_input_quant_bits=mlp_gelu_input_quant_bits,
mlp_2linear_quant_bits=mlp_2linear_quant_bits,
mlp_recompute_gelu=mlp_recompute_gelu,
mlp_gated_linear_units=mlp_gated_linear_units,
return_breakdown=True,
)
]
Expand Down Expand Up @@ -2326,6 +2322,8 @@ def training(
mlp_activation_quant_bits,
"mlp_recompute_gelu":
mlp_recompute_gelu,
"mlp_gated_linear_units":
self.model_config.mlp_gated_linear_units,
"achieved_flops":
self.get_TFLOPS_per_gpu(),
"flops_efficiency":
Expand Down Expand Up @@ -2525,7 +2523,6 @@ def train(
mlp_gelu_input_quant_bits: int = None,
mlp_2linear_quant_bits: int = None,
mlp_recompute_gelu: bool = False,
mlp_gated_linear_units: bool = False,
achieved_tflops: float = None,
flops_efficiency: float = None,
hbm_memory_efficiency: float = HBM_MEMORY_EFFICIENCY,
Expand Down Expand Up @@ -2568,7 +2565,6 @@ def train(
mlp_2linear_quant_bits (int, optional): number of bits to quantize the input activations of the second linear layer. Defaults to None.
mlp_activation_quant_bits (int, optional): number of bits for the quantized MLP activation. Defaults to None.
mlp_recompute_gelu (bool, optional): whether to recompute the GELU activation in the MLP backward pass. Defaults to False.
mlp_gated_linear_units (bool, optional): whether to use gated linear units in the MLP. Defaults to False.
achieved_tflops (float, optional): achieved TFLOPS per GPU. Defaults to None.
flops_efficiency (float, optional): flops efficiency, ranging from 0 to 1. Defaults to None.
hbm_memory_efficiency (float, optional): GPU HBM memory efficiency, ranging from 0 to 1. Defaults to HBM_MEMORY_EFFICIENCY.
Expand Down Expand Up @@ -2603,6 +2599,8 @@ def train(
model_config = get_model_config_by_name(model_name)
gpu_config = get_gpu_config_by_name(gpu_name)
dtype_config = get_dtype_config_by_name(dtype_name)
# if model_config.moe_num_experts == 1:
# ep_size = 1
parallel_config = ParallelismConfig(
tp_size=tp_size,
pp_size=pp_size,
Expand Down Expand Up @@ -2641,7 +2639,6 @@ def train(
mlp_gelu_input_quant_bits=mlp_gelu_input_quant_bits,
mlp_2linear_quant_bits=mlp_2linear_quant_bits,
mlp_recompute_gelu=mlp_recompute_gelu,
mlp_gated_linear_units=mlp_gated_linear_units,
output_dir=output_dir,
output_file_suffix=output_file_suffix,
)
Expand Down
3 changes: 2 additions & 1 deletion llm_analysis/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,13 @@ class ModelConfig:
ffn_embed_dim: int = (
None # hidden dimension of FFN, default to 4 * hidden_dim
)
expansion_ratio: int = None
expansion_ratio: float = None
model_type: str = (
None # model type as tagged on Hugging Face (e.g., gpt2, opt, llama.)
)
moe_num_experts: int = 1 # number of experts for mixture of experts model
moe_top_k: int = 1 # top k experts for mixture of experts model
mlp_gated_linear_units: bool = False # whether to use gated linear units for MLP

def __post_init__(self):
if self.ffn_embed_dim is None and self.expansion_ratio is None:
Expand Down

0 comments on commit 5d2d2fc

Please sign in to comment.