Skip to content

Commit

Permalink
Merge pull request #135 from OpenFn/132-fix-args
Browse files Browse the repository at this point in the history
Refactor services entry point to use named arguments
  • Loading branch information
josephjclark authored Dec 16, 2024
2 parents 93b146c + b984585 commit fa3372c
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 58 deletions.
28 changes: 3 additions & 25 deletions platform/src/bridge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,38 +30,16 @@ export const run = async (
console.log("Initing output file at", outputPath);
await Bun.write(outputPath, "");

// const proc = Bun.spawn(
// [
// "poetry",
// "run",
// "python",
// "services/entry.py",
// scriptName,
// JSON.stringify(args),
// ],
// {
// onExit: async (proc, exitCode, signalCode, error) => {
// // exit handler
// const result = Bun.file("out.json");
// const text = await result.text();
// resolve(JSON.parse(text));
// },
// }
// );

// Use nodejs spawn
// I seem to have to use this because the bun stream doesn't work with readline

const proc = spawn(
"poetry",
[
"run",
"python",
"services/entry.py",
scriptName,
inputPath,
outputPath,
`${port}`,
...(inputPath ? ["--input", inputPath] : []),
...(outputPath ? ["--output", outputPath] : []),
...(port ? ["--port", `${port}`] : []),
],
{}
);
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ optional = true
[tool.poetry.group.ft.dependencies]
torch = "^2.3.0"

[tool.poetry.group.dev]
optional = false

[tool.poetry.group.dev.dependencies]
pytest = "^8.3.4"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Expand Down
2 changes: 1 addition & 1 deletion services/embeddings_demo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ This demo currently uses the Zilliz embedding storage provider for OpenAI embedd
This demo can be run from the services folder via the entry.py module:

```bash
python entry.py embeddings_demo embeddings_demo/demo_data/input_data.json tmp/output.json
python entry.py embeddings_demo --input embeddings_demo/demo_data/input_data.json --output tmp/output.json
```
78 changes: 46 additions & 32 deletions services/entry.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,53 @@
import sys
import json
import uuid
import argparse
from dotenv import load_dotenv
from util import set_apollo_port, ApolloError

load_dotenv()


def call(service: str, input_path: str, output_path: str) -> dict:
def call(
service: str, *, input_path: str | None = None, output_path: str | None = None, apollo_port: int | None = None
) -> dict:
"""
Dynamically imports a module and invokes its main function with input data.
:param service: The name of the service/module to invoke.
:param input_path: Path to the input JSON file.
:param output_path: Path to write the output JSON file.
:return: Result from the service as a dictionary.
:param service: The name of the service/module to invoke
:param input_path: Optional path to the input JSON file
:param output_path: Optional path to write the output JSON file
:param apollo_port: Optional port number for Apollo server
:return: Result from the service as a dictionary
"""
if apollo_port is not None:
set_apollo_port(apollo_port)

module_name = f"{service}.{service}"

with open(input_path, "r") as f:
data = json.load(f)
data = {}
if input_path:
try:
with open(input_path, "r") as f:
data = json.load(f)
except FileNotFoundError:
return ApolloError(code=500, message=f"Input file not found: {input_path}", type="INTERNAL_ERROR").to_dict()
except json.JSONDecodeError:
return ApolloError(code=500, message="Invalid JSON input", type="INTERNAL_ERROR").to_dict()

try:
m = __import__(module_name, fromlist=["main"])
result = m.main(data)
except ModuleNotFoundError as e:
return ApolloError(code=500, message=str(e), type="INTERNAL_ERROR").to_dict()
except ApolloError as e:
result = e.to_dict()
except Exception as e:
result = ApolloError(
code=500,
message=str(e),
type="INTERNAL_ERROR"
).to_dict()
result = ApolloError(code=500, message=str(e), type="INTERNAL_ERROR").to_dict()

with open(output_path, "w") as f:
json.dump(result, f)
if output_path:
with open(output_path, "w") as f:
json.dump(result, f)

return result

Expand All @@ -44,33 +57,34 @@ def main():
Entry point when the script is run directly.
Reads arguments from stdin and calls the appropriate service.
"""
mod_name = sys.argv[1]
input_path = sys.argv[2]
output_path = None

if len(sys.argv) == 5:
output_path = sys.argv[3]
else:
print("Auto-generating output path...")
parser = argparse.ArgumentParser(description="OpenFn Apollo Service Runner")
parser.add_argument("service", help="Name of the service to run")
parser.add_argument("--input", "-i", help="Path to input JSON file")
parser.add_argument("--output", "-o", help="Path to output JSON file (auto-generated if not provided)")
parser.add_argument("--port", "-p", type=int, help="Apollo server port number")

args = parser.parse_args()

if not args.output:
id = uuid.uuid4()
output_path = f"tmp/data/{id}.json"
print(f"Result will be output to {output_path}")
args.output = f"tmp/data/{id}.json"
print(f"Result will be output to {args.output}")

if len(sys.argv) >= 5:
apollo_port = sys.argv[4]
if apollo_port:
print(f"Setting Apollo port to {apollo_port}")
set_apollo_port(apollo_port)
if args.port:
print(f"Setting Apollo port to {args.port}")
set_apollo_port(args.port)

print(f"Calling services/{mod_name} ...")
print(f"Calling services/{args.service} ...")
print()

result = call(mod_name, input_path, output_path)
result = call(service=args.service, input_path=args.input, output_path=args.output, apollo_port=args.port)

print()
print("Done!")
print(result)

return result


if __name__ == "__main__":
main()
main()
100 changes: 100 additions & 0 deletions services/entry_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import pytest
import json
import os
from pathlib import Path
from entry import call, main
import argparse
from unittest.mock import patch
from util import apollo


def setup_module():
"""Setup test directories"""
Path("tmp/data").mkdir(parents=True, exist_ok=True)


def test_minimal_call():
"""Test calling with just the service name"""
result = call("echo")
assert result == {}


def test_call_with_input():
"""Test calling with service name and input"""
input_path = "tmp/test_input.json"
test_data = {"test": "data"}

with open(input_path, "w") as f:
json.dump(test_data, f)

result = call("echo", input_path=input_path)
assert result == test_data


def test_command_line_minimal():
"""Test the absolute minimum command line usage"""
test_args = argparse.Namespace(service="echo", input=None, output=None, port=None)

with patch("argparse.ArgumentParser.parse_args", return_value=test_args):
result = main()
assert result == {}


def test_auto_generated_output():
"""Test that output path is auto-generated when not provided"""
test_args = argparse.Namespace(service="echo", input=None, output=None, port=None)

with patch("argparse.ArgumentParser.parse_args", return_value=test_args):
main()
files = list(Path("tmp/data").glob("*.json"))
assert len(files) > 0


def test_port_setting():
"""Test that port is properly set when provided"""
test_args = argparse.Namespace(service="echo", input=None, output=None, port=5000)

with patch("argparse.ArgumentParser.parse_args", return_value=test_args):
main()

with patch("requests.post") as mock_post:
mock_post.return_value.json.return_value = {"test": "data"}
apollo("test", {})
mock_post.assert_called_with("http://127.0.0.1:5000/services/test", {})


def test_invalid_service():
"""Test handling of invalid service name"""
result = call("nonexistent_service")
assert result["type"] == "INTERNAL_ERROR"
assert result["code"] == 500
assert "No module named" in result["message"]


def test_invalid_input_file():
"""Test handling of nonexistent input file"""
try:
result = call("echo", input_path="nonexistent.json")
assert result["type"] == "INTERNAL_ERROR"
assert result["code"] == 500
except FileNotFoundError:
pytest.fail("FileNotFoundError should be caught and returned as error dict")


def test_output_file_writing():
"""Test that output is written to specified file"""
output_path = "tmp/test_output.json"
test_data = {"test": "data"}

result = call("echo", output_path=output_path)

assert os.path.exists(output_path)
with open(output_path, "r") as f:
written_data = json.load(f)
assert written_data == result


def teardown_module():
"""Clean up test files"""
for f in Path("tmp").glob("test_*.json"):
f.unlink(missing_ok=True)

0 comments on commit fa3372c

Please sign in to comment.