From b9a3f1c753c0b3ddb92ac1cf1e97ed0a2789de81 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 21 Jan 2025 21:49:30 +0100 Subject: [PATCH 1/3] Fix eval on remote runtime (#6398) --- evaluation/benchmarks/swe_bench/eval_infer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluation/benchmarks/swe_bench/eval_infer.py b/evaluation/benchmarks/swe_bench/eval_infer.py index 52972a920e8e..cc9dca069440 100644 --- a/evaluation/benchmarks/swe_bench/eval_infer.py +++ b/evaluation/benchmarks/swe_bench/eval_infer.py @@ -71,7 +71,7 @@ def process_git_patch(patch): return patch -def get_config(instance: pd.Series) -> AppConfig: +def get_config(metadata: EvalMetadata, instance: pd.Series) -> AppConfig: # We use a different instance image for the each instance of swe-bench eval base_container_image = get_instance_docker_image(instance['instance_id']) logger.info( @@ -132,7 +132,7 @@ def process_instance( else: logger.info(f'Starting evaluation for instance {instance.instance_id}.') - config = get_config(instance) + config = get_config(metadata, instance) instance_id = instance.instance_id model_patch = instance['model_patch'] test_spec: TestSpec = instance['test_spec'] From b468150f2abf0f4c8bcf05072f808dd8a086e9c6 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Tue, 21 Jan 2025 16:54:57 -0500 Subject: [PATCH 2/3] fix(codeact): make sure agent sees the prefix/suffix as part of observation (#6400) --- openhands/agenthub/codeact_agent/codeact_agent.py | 9 ++------- openhands/events/observation/commands.py | 6 ++++-- tests/unit/test_codeact_agent.py | 13 ++++++++++--- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 37c52855148a..ecb756781abe 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -277,16 +277,11 @@ def get_observation_message( # if it doesn't have tool call metadata, it was triggered by a user action if obs.tool_call_metadata is None: text = truncate_content( - f'\nObserved result of command executed by user:\n{obs.content}', + f'\nObserved result of command executed by user:\n{obs.to_agent_observation()}', max_message_chars, ) else: - text = truncate_content( - obs.content - + f'\n[Python Interpreter: {obs.metadata.py_interpreter_path}]', - max_message_chars, - ) - text += f'\n[Command finished with exit code {obs.exit_code}]' + text = truncate_content(obs.to_agent_observation(), max_message_chars) message = Message(role='user', content=[TextContent(text=text)]) elif isinstance(obs, IPythonRunCellObservation): text = obs.content diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py index 31b4472f7ee7..edddb250cda5 100644 --- a/openhands/events/observation/commands.py +++ b/openhands/events/observation/commands.py @@ -149,16 +149,18 @@ def __str__(self) -> str: f'**CmdOutputObservation (source={self.source}, exit code={self.exit_code}, ' f'metadata={json.dumps(self.metadata.model_dump(), indent=2)})**\n' '--BEGIN AGENT OBSERVATION--\n' - f'{self._to_agent_observation()}\n' + f'{self.to_agent_observation()}\n' '--END AGENT OBSERVATION--' ) - def _to_agent_observation(self) -> str: + def to_agent_observation(self) -> str: ret = f'{self.metadata.prefix}{self.content}{self.metadata.suffix}' if self.metadata.working_dir: ret += f'\n[Current working directory: {self.metadata.working_dir}]' if self.metadata.py_interpreter_path: ret += f'\n[Python interpreter: {self.metadata.py_interpreter_path}]' + if self.metadata.exit_code != -1: + ret += f'\n[Command finished with exit code {self.metadata.exit_code}]' return ret diff --git a/tests/unit/test_codeact_agent.py b/tests/unit/test_codeact_agent.py index 26fa4428826e..39badebff046 100644 --- a/tests/unit/test_codeact_agent.py +++ b/tests/unit/test_codeact_agent.py @@ -46,7 +46,7 @@ def agent() -> CodeActAgent: agent = CodeActAgent(llm=LLM(LLMConfig()), config=config) agent.llm = Mock() agent.llm.config = Mock() - agent.llm.config.max_message_chars = 100 + agent.llm.config.max_message_chars = 1000 return agent @@ -65,10 +65,15 @@ def test_cmd_output_observation_message(agent: CodeActAgent): content='Command output', metadata=CmdOutputMetadata( exit_code=0, + prefix='[THIS IS PREFIX]', + suffix='[THIS IS SUFFIX]', ), ) - results = agent.get_observation_message(obs, tool_call_id_to_message={}) + tool_call_id_to_message = {} + results = agent.get_observation_message( + obs, tool_call_id_to_message=tool_call_id_to_message + ) assert len(results) == 1 result = results[0] @@ -76,8 +81,10 @@ def test_cmd_output_observation_message(agent: CodeActAgent): assert result.role == 'user' assert len(result.content) == 1 assert isinstance(result.content[0], TextContent) - assert 'Command output' in result.content[0].text + assert 'Observed result of command executed by user:' in result.content[0].text assert '[Command finished with exit code 0]' in result.content[0].text + assert '[THIS IS PREFIX]' in result.content[0].text + assert '[THIS IS SUFFIX]' in result.content[0].text def test_ipython_run_cell_observation_message(agent: CodeActAgent): From 318c811817cab33751ef40e74d8f1b3c4b0fe831 Mon Sep 17 00:00:00 2001 From: tofarr Date: Tue, 21 Jan 2025 15:32:46 -0700 Subject: [PATCH 3/3] Added check to shutdown hook (#6402) --- openhands/runtime/impl/docker/docker_runtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands/runtime/impl/docker/docker_runtime.py b/openhands/runtime/impl/docker/docker_runtime.py index bf06e00e854f..56e3f01a64e5 100644 --- a/openhands/runtime/impl/docker/docker_runtime.py +++ b/openhands/runtime/impl/docker/docker_runtime.py @@ -66,7 +66,7 @@ def __init__( headless_mode: bool = True, ): global _atexit_registered - if not _atexit_registered: + if not _atexit_registered and not config.sandbox.keep_runtime_alive: _atexit_registered = True atexit.register(remove_all_runtime_containers)