Open Inference Protocol Implementation. (#2609)

* Open Inference Protocol Implementation. Signed-off-by: Andrews Arokiam <[email protected]> * added worker status check. Signed-off-by: Andrews Arokiam <[email protected]> * torchserve version updated with server metadata. Signed-off-by: Andrews Arokiam <[email protected]> * Added OIP e2e test in CI/CD. 1. Allocated ISVC resource to avoid pod running timeout. 2. Configured environment variable INFERENCE_PROTOCOL as 'oip'. 3. Increased `max_wait_time` for pod running. 4. Deleted ISVC once the test has been passed. Signed-off-by: Andrews Arokiam <[email protected]> * Addressed review feedback: 1. Updated environment variable to `TS_OPEN_INFERENCE_PROTOCOL`. 2. Added logic to read the variable `ts_open_inference_protocol=true` from the property file to determine if OIP is enabled or not. 3. Implemented extra check for OIP `ModelInferResponse` in GRPC responses. 4. Utilized local path for the proto file in test_mnist.sh. Signed-off-by: Andrews Arokiam <[email protected]> * Changes from review feedback: 1. Modified `server live`, `server health`, `model ready` check method. 2. Added tests `server live`, `server health`, `model ready` for grpc 3. Added tests `server live`, `server health`, for http Signed-off-by: Andrews Arokiam <[email protected]> * Added a new worker command to handle the KServe OIP inference request. Signed-off-by: Andrews Arokiam <[email protected]> * Added `OIPPREDICT` worker command in Job to allow oip requests. Signed-off-by: Andrews Arokiam <[email protected]> * Format changes. Signed-off-by: Andrews Arokiam <[email protected]> --------- Signed-off-by: Andrews Arokiam <[email protected]> Co-authored-by: lxning <[email protected]>
pytorch · Jan 24, 2024 · 9e6f1c2 · 9e6f1c2
1 parent 35b0b93
commit 9e6f1c2
Show file tree

Hide file tree

Showing 17 changed files with 1,049 additions and 5 deletions.
diff --git a/.github/workflows/kserve_cpu_tests.yml b/.github/workflows/kserve_cpu_tests.yml
@@ -29,6 +29,10 @@ jobs:
         with:
           python-version: 3.8
           architecture: x64
+      - name: Install grpcurl
+        run: |
+          sudo curl -sSL https://github.com/fullstorydev/grpcurl/releases/download/v1.8.0/grpcurl_1.8.0_linux_x86_64.tar.gz | sudo tar -xz -C /usr/local/bin grpcurl
+          sudo chmod +x /usr/local/bin/grpcurl
       - name: Checkout TorchServe
         uses: actions/checkout@v3
       - name: Checkout kserve repo
@@ -37,5 +41,5 @@ jobs:
           repository: kserve/kserve
           ref: v0.11.1
           path: kserve
-      - name: Validate torchserve-kfs
+      - name: Validate torchserve-kfs and Open Inference Protocol
         run: ./kubernetes/kserve/tests/scripts/test_mnist.sh
diff --git a/frontend/server/src/main/java/org/pytorch/serve/ModelServer.java b/frontend/server/src/main/java/org/pytorch/serve/ModelServer.java
@@ -64,6 +64,7 @@ public class ModelServer {
     private ServerGroups serverGroups;
     private Server inferencegRPCServer;
     private Server managementgRPCServer;
+    private Server OIPgRPCServer;
     private List<ChannelFuture> futures = new ArrayList<>(2);
     private AtomicBoolean stopped = new AtomicBoolean(false);
     private ConfigManager configManager;
@@ -453,6 +454,16 @@ private Server startGRPCServer(ConnectorType connectorType) throws IOException {
                                         GRPCServiceFactory.getgRPCService(connectorType),
                                         new GRPCInterceptor()));
 
+        if (connectorType == ConnectorType.INFERENCE_CONNECTOR
+                && ConfigManager.getInstance().isOpenInferenceProtocol()) {
+            s.maxInboundMessageSize(configManager.getMaxRequestSize())
+                    .addService(
+                            ServerInterceptors.intercept(
+                                    GRPCServiceFactory.getgRPCService(
+                                            ConnectorType.OPEN_INFERENCE_CONNECTOR),
+                                    new GRPCInterceptor()));
+        }
+
         if (configManager.isGRPCSSLEnabled()) {
             s.useTransportSecurity(
                     new File(configManager.getCertificateFile()),

diff --git a/frontend/server/src/main/java/org/pytorch/serve/ServerInitializer.java b/frontend/server/src/main/java/org/pytorch/serve/ServerInitializer.java
@@ -13,12 +13,15 @@
 import org.pytorch.serve.http.api.rest.ApiDescriptionRequestHandler;
 import org.pytorch.serve.http.api.rest.InferenceRequestHandler;
 import org.pytorch.serve.http.api.rest.ManagementRequestHandler;
+import org.pytorch.serve.http.api.rest.OpenInferenceProtocolRequestHandler;
 import org.pytorch.serve.http.api.rest.PrometheusMetricsRequestHandler;
 import org.pytorch.serve.servingsdk.impl.PluginsManager;
 import org.pytorch.serve.util.ConfigManager;
 import org.pytorch.serve.util.ConnectorType;
 import org.pytorch.serve.workflow.api.http.WorkflowInferenceRequestHandler;
 import org.pytorch.serve.workflow.api.http.WorkflowMgmtRequestHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A special {@link io.netty.channel.ChannelInboundHandler} which offers an easy way to initialize a
@@ -29,6 +32,7 @@ public class ServerInitializer extends ChannelInitializer<Channel> {
 
     private ConnectorType connectorType;
     private SslContext sslCtx;
+    private static final Logger logger = LoggerFactory.getLogger(ServerInitializer.class);
 
     /**
      * Creates a new {@code HttpRequestHandler} instance.
@@ -65,6 +69,14 @@ public void initChannel(Channel ch) {
                                     PluginsManager.getInstance().getInferenceEndpoints()));
             httpRequestHandlerChain =
                     httpRequestHandlerChain.setNextHandler(new WorkflowInferenceRequestHandler());
+
+            // Added OIP protocol with inference connector
+            if (ConfigManager.getInstance().isOpenInferenceProtocol()) {
+                logger.info("OIP added with handler chain");
+                httpRequestHandlerChain =
+                        httpRequestHandlerChain.setNextHandler(
+                                new OpenInferenceProtocolRequestHandler());
+            }
         }
         if (ConnectorType.ALL.equals(connectorType)
                 || ConnectorType.MANAGEMENT_CONNECTOR.equals(connectorType)) {

diff --git a/frontend/server/src/main/java/org/pytorch/serve/grpcimpl/GRPCServiceFactory.java b/frontend/server/src/main/java/org/pytorch/serve/grpcimpl/GRPCServiceFactory.java
@@ -16,6 +16,9 @@ public static BindableService getgRPCService(ConnectorType connectorType) {
             case INFERENCE_CONNECTOR:
                 torchServeService = new InferenceImpl();
                 break;
+            case OPEN_INFERENCE_CONNECTOR:
+                torchServeService = new OpenInferenceProtocolImpl();
+                break;
             default:
                 break;
         }