Fix llama3 generation

mila-iqia · Sep 11, 2024 · b03a424 · b03a424
1 parent 51cfb81
commit b03a424
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 8 deletions.
diff --git a/.github/workflows/cloud-ci.yml b/.github/workflows/cloud-ci.yml
@@ -115,10 +115,10 @@ jobs:
               RUN_ON="azure__a100"
               EXCLUDE="$EXCLUDE,$_MULTI_GPUS,$_MULTI_NODES"
               ;;
-            # "2g")
-            #   RUN_ON="azure__a100_x2"
-            #   SELECT="$SELECT,$_MULTI_GPUS"
-            #   ;;
+            "2g")
+              RUN_ON="azure__a100_x2"
+              SELECT="$SELECT,$_MULTI_GPUS"
+              ;;
             "4g")
               RUN_ON="azure__a100_x4"
               SELECT="$SELECT,$_MULTI_GPUS"

diff --git a/benchmarks/llm/configs/llama3_70B_full.yaml b/benchmarks/llm/configs/llama3_70B_full.yaml
@@ -36,7 +36,7 @@ checkpointer:
   _component_: torchtune.utils.FullModelHFCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
   checkpoint_files: [
-     model-00001-of-00030.safetensors,
+    model-00001-of-00030.safetensors,
     model-00002-of-00030.safetensors,
     model-00003-of-00030.safetensors,
     model-00004-of-00030.safetensors,

diff --git a/benchmarks/llm/prepare.py b/benchmarks/llm/prepare.py
@@ -23,7 +23,6 @@
 class Arguments:
     recipe: str
     config: str = None
-    no_pretrained: bool = False
 
 
 @dataclass
@@ -100,12 +99,19 @@ def load_model(recipe, cfg):
 
 
 def generate_weights(args, config):
+    is_done:Path = args.output_dir / "generated"
+    if is_done.exists():
+        print(f"{args.output_dir}/['*.safetensors'] or ['*consolidated.*.pth'] already generated")
+        return
+
     if config.get("safetensors", False):
         params_path = args.output_dir / "config.json"
         model = LlamaForCausalLM(LlamaConfig(**json.loads(params_path.read_text())))
         # Avoid saving this as part of the config.
         del model.config._name_or_path
-        model.config.torch_dtype = torch.float16
+        # Even if model if loaded with a config.torch_dtype == bf16, model.dtype
+        # seams to be f32. Force model.dtype to be bf16
+        model.to(model.config.torch_dtype)
         model.save_pretrained(str(args.output_dir), safe_serialization=True)
 
     else:
@@ -138,6 +144,8 @@ def generate_weights(args, config):
             conn.send(True)
             p.join()
 
+    is_done.touch()
+
 
 def main():
     parser = ArgumentParser()
@@ -154,7 +162,7 @@ def main():
 
     #
     huggingface_format = config.get("safetensors", False)
-    pretrained = not args.no_pretrained
+    pretrained = not config.get("no_pretrained", False)
 
     if not pretrained:
         # if we will generate the weights do not download anyweights

diff --git a/config/base.yaml b/config/base.yaml
@@ -534,6 +534,7 @@ llm-lora-single:
   inherits: _llm
   plan:
     method: per_gpu
+
   argv:
     "{milabench_code}/recipes/lora_finetune_single_device.py": true
     --config: "{milabench_code}/configs/llama3_8B_lora_single_device.yaml"
@@ -546,6 +547,7 @@ llm-lora-single:
     repo_id="meta-llama/Meta-Llama-3.1-8B": true
     batch_size=8: true
     gradient_accumulation_steps=8: true
+    no_pretrained=True: true
 
 llm-lora-ddp-gpus:
   inherits: _llm
@@ -565,6 +567,7 @@ llm-lora-ddp-gpus:
     repo_id="meta-llama/Meta-Llama-3.1-8B": true
     batch_size=8: true
     gradient_accumulation_steps=8: true
+    no_pretrained=True: true
 
 llm-lora-ddp-nodes:
   tags:
@@ -587,6 +590,7 @@ llm-lora-ddp-nodes:
     repo_id="meta-llama/Meta-Llama-3.1-8B": true
     batch_size=8: true
     gradient_accumulation_steps=8: true
+    no_pretrained=True: true
 
   num_machines: 2
   requires_capabilities:
@@ -611,6 +615,7 @@ llm-lora-mp-gpus:
     repo_id="meta-llama/Meta-Llama-3.1-70B": true
     batch_size=8: true
     gradient_accumulation_steps=1: true
+    no_pretrained=True: true
 
 llm-full-mp-gpus:
   inherits: _llm
@@ -631,6 +636,7 @@ llm-full-mp-gpus:
     safetensors=true: true
     batch_size=2: true
     gradient_accumulation_steps=1: true
+    no_pretrained=True: true
 
 llm-full-mp-nodes:
   tags:
@@ -654,6 +660,7 @@ llm-full-mp-nodes:
     safetensors=true: true
     batch_size=2: true
     gradient_accumulation_steps=1: true
+    no_pretrained=True: true
 
   num_machines: 2
   requires_capabilities: