diff --git a/evals/convert_to_hf.py b/evals/convert_to_hf.py index 564216d..7594ca0 100644 --- a/evals/convert_to_hf.py +++ b/evals/convert_to_hf.py @@ -121,11 +121,11 @@ def main(): # Convert output from GCS to HuggingFace format logging.info("Converting to HF format") params_paths: list[Path] = find_dirs_with_files( - download_dir, "*streaming_params*" + download_dir, "*streaming_params_2625" ) pytorch_dir = Path(args.pytorch_dir) for params_path in params_paths: - if "llama" in str(params_path): + if "llama" in str(params_path) or "olmo" in str(params_path): experiment_name = params_path.parts[-2].replace(".", "-").split("--")[0] else: experiment_name = params_path.parent.stem.split("--")[0] @@ -137,7 +137,7 @@ def main(): convert_command = [ "python", "-m", - "EasyLM.models.llama.convert_easylm_to_hf", + "EasyLM.models.olmo.convert_easylm_to_hf", f"--load_checkpoint=params::{params_path}", f"--tokenizer_path={args.tokenizer_path}", f"--model_size={args.model_size}", @@ -265,7 +265,8 @@ def create_beaker_experiment_spec( result=ResultSpec(path="/output"), command=["/bin/sh", "-c"], arguments=[ - "python scripts/run_rm.py --model /reward_model --tokenizer /reward_model --batch_size 8 --trust_remote_code --do_not_save" + # "python scripts/run_rm.py --model /reward_model --tokenizer /reward_model --batch_size 8 --trust_remote_code --do_not_save" + "python scripts/run_dpo.py --model /reward_model --tokenizer /reward_model --ref_model allenai/OLMo-7B-0424-hf --batch_size 8 --trust_remote_code --do_not_save" ], datasets=[ DataMount( diff --git a/evals/generate_eval_runs.py b/evals/generate_eval_runs.py index 6d973c3..05cdb45 100644 --- a/evals/generate_eval_runs.py +++ b/evals/generate_eval_runs.py @@ -34,7 +34,7 @@ def get_args(): parser.add_argument("--gcs_bucket", type=str, help="GCS bucket where the models are stored (NO need for gs:// prefix).") parser.add_argument("--gcs_dir_path", type=str, help="The directory path (or prefix) of models (e.g., human-preferences/rm_checkpoints/tulu2_13b_rm_human_datamodel_).") parser.add_argument("--prefix", type=str, help="Prefix to append to the eval runs.") - parser.add_argument("--is_reward_model", action="store_true", default="If set, will train a reward model.") + parser.add_argument("--is_reward_model", default=False, action="store_true", help="If set, will train a reward model.") parser.add_argument("--beaker_workspace", default="ai2/ljm-oe-adapt", help="Beaker workspace to upload datasets.") parser.add_argument("--cleanup", action="store_true", default=False, help="If set, will delete uncommitted datasets (make sure no other jobs are running!)") # fmt: on @@ -87,6 +87,7 @@ def main(): ] logging.info(f"Found {len(src_files) - len(diff)} datasets already done!") src_files = diff + src_files = [file for file in src_files if "2625" in file] logging.info(f"Generating experiment file for {len(src_files)} experiments.") spec = ExperimentSpec.from_file(args.template) @@ -100,6 +101,7 @@ def main(): task.arguments.extend(["--gcs_dir_path"] + [src_file]) task.arguments.extend(["--prefix"] + [args.prefix]) if args.is_reward_model: + # logging.info("Adding --is_reward_model flag") task.arguments.extend(["--is_reward_model"]) new_tasks.append(task) diff --git a/evals/templates/template-olmo.yml b/evals/templates/template-olmo.yml new file mode 100644 index 0000000..cf6d4dc --- /dev/null +++ b/evals/templates/template-olmo.yml @@ -0,0 +1,40 @@ +version: v2 +budget: ai2/oe-adapt +description: "Convert model to pytorch and launch a rewardbench eval job" +tasks: + - name: template + image: + beaker: ljm/easylm-convert-olmo + command: ["python", "convert_to_hf.py"] + arguments: + - --gcs_bucket + - ljm-dev + - --batch_size + - 1 + - --tokenizer_path + - allenai/OLMo-7B-0424-hf + - --model_size + - "17_7b" + result: + path: /output + resources: + gpuCount: 1 + context: + priority: normal + preemptible: true + constraints: + cluster: + - ai2/ceres-cirrascale + - ai2/saturn-cirrascale + - ai2/jupiter-cirrascale-2 + envVars: + - name: OPENAI_API_KEY + secret: OPENAI_API_KEY + - name: GOOGLE_SERVICE_ACCOUNT + secret: GOOGLE_SERVICE_ACCOUNT + - name: BEAKER_TOKEN + secret: BEAKER_TOKEN + - name: TOKENIZERS_PARALLELISM + value: "false" + - name: HF_TOKEN + secret: HF_TOKEN diff --git a/scripts/submit_tpu_train_job.py b/scripts/submit_tpu_train_job.py index 9ba27cf..f79a5b4 100644 --- a/scripts/submit_tpu_train_job.py +++ b/scripts/submit_tpu_train_job.py @@ -12,18 +12,18 @@ ) DPO_JOB_TEMPLATE = ( - "python3 -m EasyLM.models.llama.llama_train_dpo " + "python3 -m EasyLM.models.olmo.olmo_train_dpo " "--mesh_dim='1,-1,4' " "--dtype='bf16' " "--num_epochs=3 " "--log_freq=50 " "--save_model_freq=1000 " "--save_milestone_freq=0 " - "--load_llama_config='13b' " - "--update_llama_config='' " + "--load_olmo_config='17_7b' " + "--update_olmo_config='' " "--load_dataset_state='' " "--load_checkpoint='params::{ckpt_gcs_path}' " - "--tokenizer.vocab_file='{vocab_gcs_path}' " + "--tokenizer='allenai/OLMo-7B-0424-hf' " "--optimizer.type='adamw' " "--optimizer.adamw_optimizer.weight_decay=0.0 " "--optimizer.adamw_optimizer.lr=5e-7 " @@ -39,24 +39,24 @@ "--logger.project='ljm-dev' " "--logger.entity='rlhf-llm-dev' " "--logger.prefix_to_id=True " - "--logger.prefix='tulu2_13b_dpo_{experiment_name}' " + "--logger.prefix=olmo1.7_7b_dpo_{experiment_name} " "--logger.output_dir='{output_gcs_path}/checkpoints/{dataset_name}'" ) RM_JOB_TEMPLATE = ( - "python3 -m EasyLM.models.llama.llama_train_rm " + "python3 -m EasyLM.models.olmo.olmo_train " "--mesh_dim=1,-1,8 " "--dtype=bf16 " "--num_epochs=1 " "--log_freq=50 " "--save_model_freq=1000 " "--save_milestone_freq=0 " - "--load_llama_config=8b31 " - "--update_llama_config='' " + "--load_olmo_config=17_7b " + "--update_olmo_config='' " "--load_dataset_state='' " "--load_checkpoint='params::{ckpt_gcs_path}' " - "--tokenizer='meta-llama/Llama-3.1-8B' " + "--tokenizer='allenai/OLMo-7B-0424-hf' " "--optimizer.type=adamw " "--optimizer.adamw_optimizer.weight_decay=0.0 " "--optimizer.adamw_optimizer.lr=1e-5 " @@ -73,7 +73,7 @@ "--logger.project=ljm-dev " "--logger.entity=rlhf-llm-dev " "--logger.prefix_to_id=True " - "--logger.prefix=llama3.1_8b_rm_{experiment_name} " + "--logger.prefix=olmo1.7_7b_rm_{experiment_name} " "--logger.output_dir='{output_gcs_path}/rm_checkpoints/{dataset_name}'" ) @@ -92,7 +92,7 @@ def get_args(): parser.add_argument("--dataset_name", type=str, required=True, help="Dataset name for managing IO paths.") parser.add_argument("--input_gcs_path", type=str, default="gs://ljm-dev/human-preferences/train_data", help="Path to the GCS bucket containing the datasets.") parser.add_argument("--output_gcs_path", type=str, default="gs://ljm-dev/human-preferences", help="Path to the GCS bucket to save the models. Will create subdirectories for DPO or RM runs.") - parser.add_argument("--ckpt_gcs_path", type=str, default="gs://hamishi-east1/easylm/llama31/llama_3_1_8b", help="GCS filepath containing the parameter checkpoint for training.") + parser.add_argument("--ckpt_gcs_path", type=str, default="gs://hamishi-east1/easylm/olmo17/olmo_17_7b_hf", help="GCS filepath containing the parameter checkpoint for training.") parser.add_argument("--vocab_gcs_path", type=str, default="gs://hamishi-east1/easylm/llama/tokenizer.model", help="GCS filepath containing the tokenizer.") parser.add_argument("--train_dpo", action="store_true", default=False, help="If set, will train a DPO model instead of a Sequence Regression RM.") parser.add_argument("--timeout", type=int, default=300, help="Set timeout (in seconds) in between training runs.")