From cc7dd8bfe480892cf774a3bff02d4103a3a6008d Mon Sep 17 00:00:00 2001
From: Eric Tang <erictang000@gmail.com>
Date: Tue, 4 Feb 2025 20:16:32 +0000
Subject: [PATCH] fix ProcessPoolExecutor + response ray sigsev bug

---
 .gitignore                                              | 2 +-
 skythought/skythought_evals/inference_and_check.py      | 3 +++
 skythought/skythought_evals/ray_configs/ray_config.yaml | 6 +++---
 skythought/skythought_evals/tasks/aime/aime.yaml        | 3 ++-
 skythought/skythought_evals/tasks/aime/aime_handler.py  | 4 ++++
 skythought/skythought_evals/util/model_utils.py         | 2 ++
 6 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3878c35..1001d78 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,7 +165,7 @@ cython_debug/
 # Vim
 *.swp
 
-.json
+*.json
 token_usage/
 
 run_all.sh
diff --git a/skythought/skythought_evals/inference_and_check.py b/skythought/skythought_evals/inference_and_check.py
index 2310cc0..911acd6 100644
--- a/skythought/skythought_evals/inference_and_check.py
+++ b/skythought/skythought_evals/inference_and_check.py
@@ -91,6 +91,9 @@ def inference(llm, conversations, max_tokens, temp, args):
         responses = [
             Response.from_ray_response(response) for response in responses.iter_rows()
         ]
+        import copy
+
+        responses = copy.deepcopy(responses)
         responses = sorted(responses, key=lambda x: x.index)
     elif args.model.startswith("openai"):
         fetch_partial = partial(
diff --git a/skythought/skythought_evals/ray_configs/ray_config.yaml b/skythought/skythought_evals/ray_configs/ray_config.yaml
index aee74bb..e56a183 100644
--- a/skythought/skythought_evals/ray_configs/ray_config.yaml
+++ b/skythought/skythought_evals/ray_configs/ray_config.yaml
@@ -1,7 +1,7 @@
 llm_engine: vllm # currently only vllm supported
-accelerator_type: A100-80G  # accelerator name as specified here: https://docs.ray.io/en/master/ray-core/accelerator-types.html#accelerator-types
+accelerator_type: H100  # accelerator name as specified here: https://docs.ray.io/en/master/ray-core/accelerator-types.html#accelerator-types
 engine_kwargs: # vllm engine kwargs 
-  tensor_parallel_size: 4
+  tensor_parallel_size: 1
   gpu_memory_utilization: 0.9
   # other optional vllm engine kwargs to tune performance!
   # pipeline_parallel_size: 1
@@ -19,5 +19,5 @@ runtime_env:
   env_vars:
     VLLM_ATTENTION_BACKEND: "FLASH_ATTN"
 env_config:
-  num_replicas: 2 # number of vllm replicas 
+  num_replicas: 8 # number of vllm replicas 
   batch_size: 128 # ray pipeline internal batch size (used for map_batches call internally). Should usually be set to a value in [64, 128, 256] for best performance.
diff --git a/skythought/skythought_evals/tasks/aime/aime.yaml b/skythought/skythought_evals/tasks/aime/aime.yaml
index 8df89f0..e512c89 100644
--- a/skythought/skythought_evals/tasks/aime/aime.yaml
+++ b/skythought/skythought_evals/tasks/aime/aime.yaml
@@ -5,4 +5,5 @@ question_key: problem
 answer_key: answer
 templating_parameters:
   regular_template:  "Return your final response within \\boxed{{}}. {prompt}"
-  sky_template: "{prompt}\nReturn your final response within \\boxed{{}}" 
\ No newline at end of file
+  sky_template: "{prompt}\nReturn your final response within \\boxed{{}}" 
+  r1_template: "Please reason step by step, and put your final answer within \\boxed{{}}. {prompt}"
\ No newline at end of file
diff --git a/skythought/skythought_evals/tasks/aime/aime_handler.py b/skythought/skythought_evals/tasks/aime/aime_handler.py
index 9e0756c..63a93fb 100644
--- a/skythought/skythought_evals/tasks/aime/aime_handler.py
+++ b/skythought/skythought_evals/tasks/aime/aime_handler.py
@@ -11,6 +11,10 @@ def generate_prompt(self, problem: Dict, model):
             return self.task_config.templating_parameters["sky_template"].format(
                 prompt=problem["problem"]
             )
+        elif "DeepSeek-R1" in MODEL_TO_NAME[model]:
+            return self.task_config.templating_parameters["r1_template"].format(
+                prompt=problem["problem"]
+            )
         else:
             return self.task_config.templating_parameters["regular_template"].format(
                 prompt=problem["problem"]
diff --git a/skythought/skythought_evals/util/model_utils.py b/skythought/skythought_evals/util/model_utils.py
index 5478475..436fd9c 100644
--- a/skythought/skythought_evals/util/model_utils.py
+++ b/skythought/skythought_evals/util/model_utils.py
@@ -52,6 +52,7 @@
     "openai/o1-mini": "Question: {input}\nAnswer: ",
     "openai/o1-preview": "Question: {input}\nAnswer: ",
     "openai/gpt-4o-mini": "User: {input}\nPlease reason step by step, and put your final answer within \\boxed{{}}.\n\nAssistant:",
+    "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": "",
 }
 
 MODEL_TO_NAME = {
@@ -68,6 +69,7 @@
     "openai/o1-mini": "o1-mini",
     "openai/o1-preview": "o1-preview",
     "openai/gpt-4o-mini": "gpt-4o-mini",
+    "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": "DeepSeek-R1-Distill-Qwen-7B",
 }
 
 SUBPROBLEM_SPLIT_PROMPT = """