Skip to content

Commit

Permalink
load all data
Browse files Browse the repository at this point in the history
  • Loading branch information
neginraoof committed Jan 23, 2025
1 parent e0eced1 commit 3ee9845
Showing 1 changed file with 30 additions and 9 deletions.
39 changes: 30 additions & 9 deletions eval/chat_benchmarks/LiveCodeBench/eval_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def generate_responses(self, model: LM) -> Dict[str, Any]:
# Prepare instances for model
all_instances = []
for idx, example in enumerate(examples):
if examples["is_stdin"]:

if example["is_stdin"]:
prompt_text = "Generate an executable Python function generated from the given prompt. The function should take stdin as input and print the output. Simply call the function after the definition." + example["prompt"]
else:
prompt_text = "Generate an executable Python function generated from the given prompt. Return the function body without invoking it at the final solution." + example["prompt"]
Expand Down Expand Up @@ -172,11 +173,31 @@ def evaluate_responses(self, responses: Dict[str, Any]) -> Dict[str, float]:

def load_questions(self) -> List[Dict[str, str]]:
"""Load LiveCodeBench questions from source."""
dataset = load_dataset("livecodebench/code_generation_lite", version_tag="release_v2", split='test[:500]', trust_remote_code=True)
dataset = dataset.map(
lambda example: {
"private_test_cases": translate_private_test_cases(example["private_test_cases"])
}
)
dataset = dataset.map(map_to_example, remove_columns=dataset.column_names)
return dataset
# Load dataset in smaller chunks and combine
all_examples = []
chunk_size = 200 # Process 200 examples at a time

for i in range(0, 511, chunk_size): # Assuming total size is 511
try:
dataset = load_dataset(
"livecodebench/code_generation_lite",
version_tag="release_v2",
split=f'test[{i}:{i+chunk_size}]',
trust_remote_code=True
)

# Process chunk
dataset = dataset.map(
lambda example: {
"private_test_cases": translate_private_test_cases(example["private_test_cases"])
}
)
dataset = dataset.map(map_to_example, remove_columns=dataset.column_names)

all_examples.extend(dataset)

except ValueError:
# We've reached the end of the dataset
break

return all_examples

0 comments on commit 3ee9845

Please sign in to comment.