Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OLMo 1.7 7b experiments #38

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions evals/convert_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,11 @@ def main():
# Convert output from GCS to HuggingFace format
logging.info("Converting to HF format")
params_paths: list[Path] = find_dirs_with_files(
download_dir, "*streaming_params*"
download_dir, "*streaming_params_2625"
)
pytorch_dir = Path(args.pytorch_dir)
for params_path in params_paths:
if "llama" in str(params_path):
if "llama" in str(params_path) or "olmo" in str(params_path):
experiment_name = params_path.parts[-2].replace(".", "-").split("--")[0]
else:
experiment_name = params_path.parent.stem.split("--")[0]
Expand All @@ -137,7 +137,7 @@ def main():
convert_command = [
"python",
"-m",
"EasyLM.models.llama.convert_easylm_to_hf",
"EasyLM.models.olmo.convert_easylm_to_hf",
f"--load_checkpoint=params::{params_path}",
f"--tokenizer_path={args.tokenizer_path}",
f"--model_size={args.model_size}",
Expand Down Expand Up @@ -265,7 +265,8 @@ def create_beaker_experiment_spec(
result=ResultSpec(path="/output"),
command=["/bin/sh", "-c"],
arguments=[
"python scripts/run_rm.py --model /reward_model --tokenizer /reward_model --batch_size 8 --trust_remote_code --do_not_save"
# "python scripts/run_rm.py --model /reward_model --tokenizer /reward_model --batch_size 8 --trust_remote_code --do_not_save"
"python scripts/run_dpo.py --model /reward_model --tokenizer /reward_model --ref_model allenai/OLMo-7B-0424-hf --batch_size 8 --trust_remote_code --do_not_save"
],
datasets=[
DataMount(
Expand Down
4 changes: 3 additions & 1 deletion evals/generate_eval_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def get_args():
parser.add_argument("--gcs_bucket", type=str, help="GCS bucket where the models are stored (NO need for gs:// prefix).")
parser.add_argument("--gcs_dir_path", type=str, help="The directory path (or prefix) of models (e.g., human-preferences/rm_checkpoints/tulu2_13b_rm_human_datamodel_).")
parser.add_argument("--prefix", type=str, help="Prefix to append to the eval runs.")
parser.add_argument("--is_reward_model", action="store_true", default="If set, will train a reward model.")
parser.add_argument("--is_reward_model", default=False, action="store_true", help="If set, will train a reward model.")
parser.add_argument("--beaker_workspace", default="ai2/ljm-oe-adapt", help="Beaker workspace to upload datasets.")
parser.add_argument("--cleanup", action="store_true", default=False, help="If set, will delete uncommitted datasets (make sure no other jobs are running!)")
# fmt: on
Expand Down Expand Up @@ -87,6 +87,7 @@ def main():
]
logging.info(f"Found {len(src_files) - len(diff)} datasets already done!")
src_files = diff
src_files = [file for file in src_files if "2625" in file]
logging.info(f"Generating experiment file for {len(src_files)} experiments.")

spec = ExperimentSpec.from_file(args.template)
Expand All @@ -100,6 +101,7 @@ def main():
task.arguments.extend(["--gcs_dir_path"] + [src_file])
task.arguments.extend(["--prefix"] + [args.prefix])
if args.is_reward_model:
# logging.info("Adding --is_reward_model flag")
task.arguments.extend(["--is_reward_model"])
new_tasks.append(task)

Expand Down
40 changes: 40 additions & 0 deletions evals/templates/template-olmo.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
version: v2
budget: ai2/oe-adapt
description: "Convert model to pytorch and launch a rewardbench eval job"
tasks:
- name: template
image:
beaker: ljm/easylm-convert-olmo
command: ["python", "convert_to_hf.py"]
arguments:
- --gcs_bucket
- ljm-dev
- --batch_size
- 1
- --tokenizer_path
- allenai/OLMo-7B-0424-hf
- --model_size
- "17_7b"
result:
path: /output
resources:
gpuCount: 1
context:
priority: normal
preemptible: true
constraints:
cluster:
- ai2/ceres-cirrascale
- ai2/saturn-cirrascale
- ai2/jupiter-cirrascale-2
envVars:
- name: OPENAI_API_KEY
secret: OPENAI_API_KEY
- name: GOOGLE_SERVICE_ACCOUNT
secret: GOOGLE_SERVICE_ACCOUNT
- name: BEAKER_TOKEN
secret: BEAKER_TOKEN
- name: TOKENIZERS_PARALLELISM
value: "false"
- name: HF_TOKEN
secret: HF_TOKEN
22 changes: 11 additions & 11 deletions scripts/submit_tpu_train_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@
)

DPO_JOB_TEMPLATE = (
"python3 -m EasyLM.models.llama.llama_train_dpo "
"python3 -m EasyLM.models.olmo.olmo_train_dpo "
"--mesh_dim='1,-1,4' "
"--dtype='bf16' "
"--num_epochs=3 "
"--log_freq=50 "
"--save_model_freq=1000 "
"--save_milestone_freq=0 "
"--load_llama_config='13b' "
"--update_llama_config='' "
"--load_olmo_config='17_7b' "
"--update_olmo_config='' "
"--load_dataset_state='' "
"--load_checkpoint='params::{ckpt_gcs_path}' "
"--tokenizer.vocab_file='{vocab_gcs_path}' "
"--tokenizer='allenai/OLMo-7B-0424-hf' "
"--optimizer.type='adamw' "
"--optimizer.adamw_optimizer.weight_decay=0.0 "
"--optimizer.adamw_optimizer.lr=5e-7 "
Expand All @@ -39,24 +39,24 @@
"--logger.project='ljm-dev' "
"--logger.entity='rlhf-llm-dev' "
"--logger.prefix_to_id=True "
"--logger.prefix='tulu2_13b_dpo_{experiment_name}' "
"--logger.prefix=olmo1.7_7b_dpo_{experiment_name} "
"--logger.output_dir='{output_gcs_path}/checkpoints/{dataset_name}'"
)


RM_JOB_TEMPLATE = (
"python3 -m EasyLM.models.llama.llama_train_rm "
"python3 -m EasyLM.models.olmo.olmo_train "
"--mesh_dim=1,-1,8 "
"--dtype=bf16 "
"--num_epochs=1 "
"--log_freq=50 "
"--save_model_freq=1000 "
"--save_milestone_freq=0 "
"--load_llama_config=8b31 "
"--update_llama_config='' "
"--load_olmo_config=17_7b "
"--update_olmo_config='' "
"--load_dataset_state='' "
"--load_checkpoint='params::{ckpt_gcs_path}' "
"--tokenizer='meta-llama/Llama-3.1-8B' "
"--tokenizer='allenai/OLMo-7B-0424-hf' "
"--optimizer.type=adamw "
"--optimizer.adamw_optimizer.weight_decay=0.0 "
"--optimizer.adamw_optimizer.lr=1e-5 "
Expand All @@ -73,7 +73,7 @@
"--logger.project=ljm-dev "
"--logger.entity=rlhf-llm-dev "
"--logger.prefix_to_id=True "
"--logger.prefix=llama3.1_8b_rm_{experiment_name} "
"--logger.prefix=olmo1.7_7b_rm_{experiment_name} "
"--logger.output_dir='{output_gcs_path}/rm_checkpoints/{dataset_name}'"
)

Expand All @@ -92,7 +92,7 @@ def get_args():
parser.add_argument("--dataset_name", type=str, required=True, help="Dataset name for managing IO paths.")
parser.add_argument("--input_gcs_path", type=str, default="gs://ljm-dev/human-preferences/train_data", help="Path to the GCS bucket containing the datasets.")
parser.add_argument("--output_gcs_path", type=str, default="gs://ljm-dev/human-preferences", help="Path to the GCS bucket to save the models. Will create subdirectories for DPO or RM runs.")
parser.add_argument("--ckpt_gcs_path", type=str, default="gs://hamishi-east1/easylm/llama31/llama_3_1_8b", help="GCS filepath containing the parameter checkpoint for training.")
parser.add_argument("--ckpt_gcs_path", type=str, default="gs://hamishi-east1/easylm/olmo17/olmo_17_7b_hf", help="GCS filepath containing the parameter checkpoint for training.")
parser.add_argument("--vocab_gcs_path", type=str, default="gs://hamishi-east1/easylm/llama/tokenizer.model", help="GCS filepath containing the tokenizer.")
parser.add_argument("--train_dpo", action="store_true", default=False, help="If set, will train a DPO model instead of a Sequence Regression RM.")
parser.add_argument("--timeout", type=int, default=300, help="Set timeout (in seconds) in between training runs.")
Expand Down