From cc7dd8bfe480892cf774a3bff02d4103a3a6008d Mon Sep 17 00:00:00 2001 From: Eric Tang Date: Tue, 4 Feb 2025 20:16:32 +0000 Subject: [PATCH] fix ProcessPoolExecutor + response ray sigsev bug --- .gitignore | 2 +- skythought/skythought_evals/inference_and_check.py | 3 +++ skythought/skythought_evals/ray_configs/ray_config.yaml | 6 +++--- skythought/skythought_evals/tasks/aime/aime.yaml | 3 ++- skythought/skythought_evals/tasks/aime/aime_handler.py | 4 ++++ skythought/skythought_evals/util/model_utils.py | 2 ++ 6 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 3878c35..1001d78 100644 --- a/.gitignore +++ b/.gitignore @@ -165,7 +165,7 @@ cython_debug/ # Vim *.swp -.json +*.json token_usage/ run_all.sh diff --git a/skythought/skythought_evals/inference_and_check.py b/skythought/skythought_evals/inference_and_check.py index 2310cc0..911acd6 100644 --- a/skythought/skythought_evals/inference_and_check.py +++ b/skythought/skythought_evals/inference_and_check.py @@ -91,6 +91,9 @@ def inference(llm, conversations, max_tokens, temp, args): responses = [ Response.from_ray_response(response) for response in responses.iter_rows() ] + import copy + + responses = copy.deepcopy(responses) responses = sorted(responses, key=lambda x: x.index) elif args.model.startswith("openai"): fetch_partial = partial( diff --git a/skythought/skythought_evals/ray_configs/ray_config.yaml b/skythought/skythought_evals/ray_configs/ray_config.yaml index aee74bb..e56a183 100644 --- a/skythought/skythought_evals/ray_configs/ray_config.yaml +++ b/skythought/skythought_evals/ray_configs/ray_config.yaml @@ -1,7 +1,7 @@ llm_engine: vllm # currently only vllm supported -accelerator_type: A100-80G # accelerator name as specified here: https://docs.ray.io/en/master/ray-core/accelerator-types.html#accelerator-types +accelerator_type: H100 # accelerator name as specified here: https://docs.ray.io/en/master/ray-core/accelerator-types.html#accelerator-types engine_kwargs: # vllm engine kwargs - tensor_parallel_size: 4 + tensor_parallel_size: 1 gpu_memory_utilization: 0.9 # other optional vllm engine kwargs to tune performance! # pipeline_parallel_size: 1 @@ -19,5 +19,5 @@ runtime_env: env_vars: VLLM_ATTENTION_BACKEND: "FLASH_ATTN" env_config: - num_replicas: 2 # number of vllm replicas + num_replicas: 8 # number of vllm replicas batch_size: 128 # ray pipeline internal batch size (used for map_batches call internally). Should usually be set to a value in [64, 128, 256] for best performance. diff --git a/skythought/skythought_evals/tasks/aime/aime.yaml b/skythought/skythought_evals/tasks/aime/aime.yaml index 8df89f0..e512c89 100644 --- a/skythought/skythought_evals/tasks/aime/aime.yaml +++ b/skythought/skythought_evals/tasks/aime/aime.yaml @@ -5,4 +5,5 @@ question_key: problem answer_key: answer templating_parameters: regular_template: "Return your final response within \\boxed{{}}. {prompt}" - sky_template: "{prompt}\nReturn your final response within \\boxed{{}}" \ No newline at end of file + sky_template: "{prompt}\nReturn your final response within \\boxed{{}}" + r1_template: "Please reason step by step, and put your final answer within \\boxed{{}}. {prompt}" \ No newline at end of file diff --git a/skythought/skythought_evals/tasks/aime/aime_handler.py b/skythought/skythought_evals/tasks/aime/aime_handler.py index 9e0756c..63a93fb 100644 --- a/skythought/skythought_evals/tasks/aime/aime_handler.py +++ b/skythought/skythought_evals/tasks/aime/aime_handler.py @@ -11,6 +11,10 @@ def generate_prompt(self, problem: Dict, model): return self.task_config.templating_parameters["sky_template"].format( prompt=problem["problem"] ) + elif "DeepSeek-R1" in MODEL_TO_NAME[model]: + return self.task_config.templating_parameters["r1_template"].format( + prompt=problem["problem"] + ) else: return self.task_config.templating_parameters["regular_template"].format( prompt=problem["problem"] diff --git a/skythought/skythought_evals/util/model_utils.py b/skythought/skythought_evals/util/model_utils.py index 5478475..436fd9c 100644 --- a/skythought/skythought_evals/util/model_utils.py +++ b/skythought/skythought_evals/util/model_utils.py @@ -52,6 +52,7 @@ "openai/o1-mini": "Question: {input}\nAnswer: ", "openai/o1-preview": "Question: {input}\nAnswer: ", "openai/gpt-4o-mini": "User: {input}\nPlease reason step by step, and put your final answer within \\boxed{{}}.\n\nAssistant:", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": "", } MODEL_TO_NAME = { @@ -68,6 +69,7 @@ "openai/o1-mini": "o1-mini", "openai/o1-preview": "o1-preview", "openai/gpt-4o-mini": "gpt-4o-mini", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": "DeepSeek-R1-Distill-Qwen-7B", } SUBPROBLEM_SPLIT_PROMPT = """