diff --git a/Dockerfile b/Dockerfile
index dd8b88c1f..3a1f637f7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,8 +1,8 @@
-FROM golang:1.12.6-alpine3.10 AS build
+FROM golang:1.12.7-alpine3.10 AS build
 
 WORKDIR /go/src/github.com/kubeflow/mpi-operator/
 COPY . /go/src/github.com/kubeflow/mpi-operator/
-RUN go build -o /bin/mpi-operator github.com/kubeflow/mpi-operator/cmd/mpi-operator.v1alpha1
+RUN go build -o /bin/mpi-operator github.com/kubeflow/mpi-operator/cmd/mpi-operator.v1alpha2
 
 FROM alpine:3.10
 COPY --from=build /bin/mpi-operator /bin/mpi-operator
diff --git a/README.md b/README.md
index ac1165912..b2327ac60 100644
--- a/README.md
+++ b/README.md
@@ -39,20 +39,21 @@ ks apply ${ENVIRONMENT} -c mpi-operator
 Alternatively, you can deploy the operator with default settings without using ksonnet by running the following from the repo:
 
 ```shell
+kubectl create -f deploy/crd/crd-v1alpha2.yaml
 kubectl create -f deploy/
 ```
 
 ## Creating an MPI Job
 
-You can create an MPI job by defining an `MPIJob` config file. See [Tensorflow benchmark example](https://github.com/kubeflow/mpi-operator/blob/master/examples/tensorflow-benchmarks.yaml) config file for launching a multi-node TensorFlow benchmark training job. You may change the config file based on your requirements.
+You can create an MPI job by defining an `MPIJob` config file. See [Tensorflow benchmark example](https://github.com/kubeflow/mpi-operator/blob/master/examples/v1alpha2/tensorflow-benchmarks.yaml) config file for launching a multi-node TensorFlow benchmark training job. You may change the config file based on your requirements.
 
 ```
-cat examples/tensorflow-benchmarks.yaml
+cat examples/v1alpha2/tensorflow-benchmarks.yaml
 ```
 Deploy the `MPIJob` resource to start training:
 
 ```
-kubectl create -f examples/tensorflow-benchmarks.yaml
+kubectl create -f examples/v1alpha2/tensorflow-benchmarks.yaml
 ```
 
 ## Monitoring an MPI Job
@@ -60,45 +61,105 @@ kubectl create -f examples/tensorflow-benchmarks.yaml
 Once the `MPIJob` resource is created, you should now be able to see the created pods matching the specified number of GPUs. You can also monitor the job status from the status section. Here is sample output when the job is successfully completed.
 
 ```
-kubectl get -o yaml mpijobs tensorflow-benchmarks-16
+kubectl get -o yaml mpijobs tensorflow-benchmarks
 ```
 
 ```
-apiVersion: kubeflow.org/v1alpha1
+apiVersion: kubeflow.org/v1alpha2
 kind: MPIJob
 metadata:
-  clusterName: ""
-  creationTimestamp: 2019-01-07T20:32:12Z
+  creationTimestamp: "2019-07-09T22:15:51Z"
   generation: 1
-  name: tensorflow-benchmarks-16
+  name: tensorflow-benchmarks
   namespace: default
-  resourceVersion: "185051397"
-  selfLink: /apis/kubeflow.org/v1alpha1/namespaces/default/mpijobs/tensorflow-benchmarks-16
-  uid: 8dc8c044-127d-11e9-a419-02420bbe29f3
+  resourceVersion: "5645868"
+  selfLink: /apis/kubeflow.org/v1alpha2/namespaces/default/mpijobs/tensorflow-benchmarks
+  uid: 1c5b470f-a297-11e9-964d-88d7f67c6e6d
 spec:
-  gpus: 16
-  template:
-    metadata:
-      creationTimestamp: null
-    spec:
-      containers:
-      - image: mpioperator/tensorflow-benchmarks:latest
-        name: tensorflow-benchmarks
-        resources: {}
+  cleanPodPolicy: Running
+  mpiReplicaSpecs:
+    Launcher:
+      replicas: 1
+      template:
+        spec:
+          containers:
+          - command:
+            - mpirun
+            - --allow-run-as-root
+            - -np
+            - "2"
+            - -bind-to
+            - none
+            - -map-by
+            - slot
+            - -x
+            - NCCL_DEBUG=INFO
+            - -x
+            - LD_LIBRARY_PATH
+            - -x
+            - PATH
+            - -mca
+            - pml
+            - ob1
+            - -mca
+            - btl
+            - ^openib
+            - python
+            - scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py
+            - --model=resnet101
+            - --batch_size=64
+            - --variable_update=horovod
+            image: mpioperator/tensorflow-benchmarks:latest
+            name: tensorflow-benchmarks
+    Worker:
+      replicas: 1
+      template:
+        spec:
+          containers:
+          - image: mpioperator/tensorflow-benchmarks:latest
+            name: tensorflow-benchmarks
+            resources:
+              limits:
+                nvidia.com/gpu: 2
+  slotsPerWorker: 2
 status:
-  launcherStatus: Succeeded
+  completionTime: "2019-07-09T22:17:06Z"
+  conditions:
+  - lastTransitionTime: "2019-07-09T22:15:51Z"
+    lastUpdateTime: "2019-07-09T22:15:51Z"
+    message: MPIJob default/tensorflow-benchmarks is created.
+    reason: MPIJobCreated
+    status: "True"
+    type: Created
+  - lastTransitionTime: "2019-07-09T22:15:54Z"
+    lastUpdateTime: "2019-07-09T22:15:54Z"
+    message: MPIJob default/tensorflow-benchmarks is running.
+    reason: MPIJobRunning
+    status: "False"
+    type: Running
+  - lastTransitionTime: "2019-07-09T22:17:06Z"
+    lastUpdateTime: "2019-07-09T22:17:06Z"
+    message: MPIJob default/tensorflow-benchmarks successfully completed.
+    reason: MPIJobSucceeded
+    status: "True"
+    type: Succeeded
+  replicaStatuses:
+    Launcher:
+      succeeded: 1
+    Worker: {}
+  startTime: "2019-07-09T22:15:51Z"
 ```
 
 
 Training should run for 100 steps and takes a few minutes on a GPU cluster. You can inspect the logs to see the training progress. When the job starts, access the logs from the `launcher` pod:
 
 ```
-PODNAME=$(kubectl get pods -l mpi_job_name=tensorflow-benchmarks-16,mpi_role_type=launcher -o name)
+PODNAME=$(kubectl get pods -l mpi_job_name=tensorflow-benchmarks,mpi_role_type=launcher -o name)
 kubectl logs -f ${PODNAME}
 ```
 
 ```
-TensorFlow:  1.10
+TensorFlow:  1.14
 Model:       resnet101
 Dataset:     imagenet (synthetic)
 Mode:        training
@@ -108,32 +169,29 @@ Batch size:  128 global
 Num batches: 100
 Num epochs:  0.01
 Devices:     ['horovod/gpu:0', 'horovod/gpu:1']
+NUMA bind:   False
 Data format: NCHW
 Optimizer:   sgd
 Variables:   horovod
 
 ...
 
-40	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.146
-40	images/sec: 132.1 +/- 0.0 (jitter = 0.1)	9.182
-50	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.071
-50	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.210
-60	images/sec: 132.2 +/- 0.0 (jitter = 0.2)	9.180
-60	images/sec: 132.2 +/- 0.0 (jitter = 0.2)	9.055
-70	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.005
-70	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.096
-80	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.231
-80	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.197
-90	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.201
-90	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.089
-100	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.183
-----------------------------------------------------------------
-total images/sec: 264.26
-----------------------------------------------------------------
-100	images/sec: 132.1 +/- 0.0 (jitter = 0.2)	9.044
-----------------------------------------------------------------
-total images/sec: 264.26
+40	images/sec: 154.4 +/- 0.7 (jitter = 4.0)	8.280
+40	images/sec: 154.4 +/- 0.7 (jitter = 4.1)	8.482
+50	images/sec: 154.8 +/- 0.6 (jitter = 4.0)	8.397
+50	images/sec: 154.8 +/- 0.6 (jitter = 4.2)	8.450
+60	images/sec: 154.5 +/- 0.5 (jitter = 4.1)	8.321
+60	images/sec: 154.5 +/- 0.5 (jitter = 4.4)	8.349
+70	images/sec: 154.5 +/- 0.5 (jitter = 4.0)	8.433
+70	images/sec: 154.5 +/- 0.5 (jitter = 4.4)	8.430
+80	images/sec: 154.8 +/- 0.4 (jitter = 3.6)	8.199
+80	images/sec: 154.8 +/- 0.4 (jitter = 3.8)	8.404
+90	images/sec: 154.6 +/- 0.4 (jitter = 3.7)	8.418
+90	images/sec: 154.6 +/- 0.4 (jitter = 3.6)	8.459
+100	images/sec: 154.2 +/- 0.4 (jitter = 4.0)	8.372
+100	images/sec: 154.2 +/- 0.4 (jitter = 4.0)	8.542
 ----------------------------------------------------------------
+total images/sec: 308.27
 ```
 
 # Docker Images
diff --git a/cmd/kubectl-delivery/Dockerfile b/cmd/kubectl-delivery/Dockerfile
index 1aca029c6..a7fd5e4d4 100644
--- a/cmd/kubectl-delivery/Dockerfile
+++ b/cmd/kubectl-delivery/Dockerfile
@@ -1,13 +1,13 @@
-FROM alpine:3.8 AS build
+FROM alpine:3.10 AS build
 
 # Install kubectl.
-ENV K8S_VERSION v1.13.2
+ENV K8S_VERSION v1.15.0
 RUN apk add --no-cache wget
 RUN wget -q https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/linux/amd64/kubectl
 RUN chmod +x ./kubectl
 RUN mv ./kubectl /bin/kubectl
 
-FROM alpine:3.8
+FROM alpine:3.10
 COPY --from=build /bin/kubectl /bin/kubectl
 COPY deliver_kubectl.sh .
 ENTRYPOINT ["./deliver_kubectl.sh"]
diff --git a/cmd/mpi-operator.v1alpha2/app/options/options.go b/cmd/mpi-operator.v1alpha2/app/options/options.go
index d86851ec7..b402c86e8 100644
--- a/cmd/mpi-operator.v1alpha2/app/options/options.go
+++ b/cmd/mpi-operator.v1alpha2/app/options/options.go
@@ -50,8 +50,8 @@ func (s *ServerOption) AddFlags(fs *flag.FlagSet) {
 		"The container image used to deliver the kubectl binary.")
 
 	fs.StringVar(&s.Namespace, "namespace", v1.NamespaceAll,
-		`The namespace to monitor tfjobs. If unset, it monitors all namespaces cluster-wide. 
-                If set, it only monitors tfjobs in the given namespace.`)
+		`The namespace to monitor mpijobs. If unset, it monitors all namespaces cluster-wide. 
+                If set, it only monitors mpijobs in the given namespace.`)
 
 	fs.IntVar(&s.Threadiness, "threadiness", 2,
 		`How many threads to process the main logic`)
diff --git a/deploy/2-rbac.yaml b/deploy/2-rbac.yaml
index 9fe9fd7b7..a8a470530 100644
--- a/deploy/2-rbac.yaml
+++ b/deploy/2-rbac.yaml
@@ -26,6 +26,14 @@ rules:
   - pods/exec
   verbs:
   - create
+- apiGroups:
+  - ""
+  resources:
+  - endpoints
+  verbs:
+  - create
+  - get
+  - update
 - apiGroups:
   - ""
   resources:
@@ -80,6 +88,7 @@ rules:
   - kubeflow.org
   resources:
   - mpijobs
+  - mpijobs/status
   verbs:
   - "*"
 ---
diff --git a/deploy/3-mpi-operator.yaml b/deploy/3-mpi-operator.yaml
index 26d78e6ab..741be0c9b 100644
--- a/deploy/3-mpi-operator.yaml
+++ b/deploy/3-mpi-operator.yaml
@@ -21,7 +21,6 @@ spec:
         image: mpioperator/mpi-operator:latest
         args: [
           "-alsologtostderr",
-          "--gpus-per-node", "8",
           "--kubectl-delivery-image",
           "mpioperator/kubectl-delivery:latest"
         ]
diff --git a/examples/tensorflow-benchmarks/Dockerfile b/examples/tensorflow-benchmarks/Dockerfile
index 568b3e3ad..6f99d99f3 100644
--- a/examples/tensorflow-benchmarks/Dockerfile
+++ b/examples/tensorflow-benchmarks/Dockerfile
@@ -1,16 +1,8 @@
-FROM uber/horovod:0.15.2-tf1.12.0-torch1.0.0-py2.7
-
-# Temporary fix until Horovod pushes out a new release.
-# See https://github.com/uber/horovod/pull/700
-RUN sed -i '/^NCCL_SOCKET_IFNAME.*/d' /etc/nccl.conf
+FROM horovod/horovod:0.16.4-tf1.14.0-torch1.1.0-mxnet1.4.1-py3.6
 
 RUN mkdir /tensorflow
 WORKDIR "/tensorflow"
-RUN git clone -b cnn_tf_v1.12_compatible https://github.com/tensorflow/benchmarks
+RUN git clone https://github.com/tensorflow/benchmarks
 WORKDIR "/tensorflow/benchmarks"
 
-CMD mpirun \
-  python scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py \
-    --model resnet101 \
-    --batch_size 64 \
-    --variable_update horovod
+CMD ["/bin/bash"]
diff --git a/examples/tensorflow-benchmarks-custom.yaml b/examples/v1alpha1/tensorflow-benchmarks-custom.yaml
similarity index 100%
rename from examples/tensorflow-benchmarks-custom.yaml
rename to examples/v1alpha1/tensorflow-benchmarks-custom.yaml
diff --git a/examples/tensorflow-benchmarks-imagenet.yaml b/examples/v1alpha1/tensorflow-benchmarks-imagenet.yaml
similarity index 100%
rename from examples/tensorflow-benchmarks-imagenet.yaml
rename to examples/v1alpha1/tensorflow-benchmarks-imagenet.yaml
diff --git a/examples/tensorflow-benchmarks.yaml b/examples/v1alpha1/tensorflow-benchmarks.yaml
similarity index 100%
rename from examples/tensorflow-benchmarks.yaml
rename to examples/v1alpha1/tensorflow-benchmarks.yaml
diff --git a/examples/v1alpha2/tensorflow-benchmarks-custom.yaml b/examples/v1alpha2/tensorflow-benchmarks-custom.yaml
deleted file mode 100644
index 15895ba52..000000000
--- a/examples/v1alpha2/tensorflow-benchmarks-custom.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-# This file shows how to run multi-node training benchmarks using an MPIJob,
-# specifying GPUs explicitly per worker.
-apiVersion: kubeflow.org/v1alpha2
-kind: MPIJob
-metadata:
-  name: tensorflow-benchmarks-16-custom
-spec:
-  slotsPerWorker: 4
-  mpiReplicaSpecs:
-    Launcher:
-      replicas: 1
-      template:
-        spec:
-          containers:
-          - image: mpioperator/tensorflow-benchmarks:latest
-            name: tensorflow-benchmarks
-    Worker:
-      replicas: 4
-      template:
-        spec:
-          containers:
-          - image: mpioperator/tensorflow-benchmarks:latest
-            name: tensorflow-benchmarks
-          resources:
-            limits:
-              nvidia.com/gpu: 4
\ No newline at end of file
diff --git a/examples/v1alpha2/tensorflow-benchmarks-imagenet.yaml b/examples/v1alpha2/tensorflow-benchmarks-imagenet.yaml
index cac1637f3..412b6478c 100644
--- a/examples/v1alpha2/tensorflow-benchmarks-imagenet.yaml
+++ b/examples/v1alpha2/tensorflow-benchmarks-imagenet.yaml
@@ -8,6 +8,8 @@ kind: MPIJob
 metadata:
   name: tensorflow-benchmarks-imagenet
 spec:
+  slotsPerWorker: 8
+  cleanPodPolicy: Running
   mpiReplicaSpecs:
     Launcher:
       replicas: 1
@@ -18,6 +20,25 @@ spec:
             name: tensorflow-benchmarks
             command:
               - mpirun
+              - --allow-run-as-root
+              - -np
+              - "16"
+              - -bind-to
+              - none
+              - -map-by
+              - slot
+              - -x
+              - NCCL_DEBUG=INFO
+              - -x
+              - LD_LIBRARY_PATH
+              - -x
+              - PATH
+              - -mca
+              - pml
+              - ob1
+              - -mca
+              - btl
+              - ^openib
               - python
               - scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py
               - --data_format=NCHW
@@ -39,6 +60,9 @@ spec:
           containers:
           - image: mpioperator/tensorflow-benchmarks:latest
             name: tensorflow-benchmarks
+            resources:
+              limits:
+                nvidia.com/gpu: 8
             volumeMounts:
             - mountPath: /efs
               name: efs
diff --git a/examples/v1alpha2/tensorflow-benchmarks.yaml b/examples/v1alpha2/tensorflow-benchmarks.yaml
index 7eb1bc3e4..a7f002a57 100644
--- a/examples/v1alpha2/tensorflow-benchmarks.yaml
+++ b/examples/v1alpha2/tensorflow-benchmarks.yaml
@@ -1,9 +1,10 @@
 apiVersion: kubeflow.org/v1alpha2
 kind: MPIJob
 metadata:
-  name: tensorflow-benchmarks-16
+  name: tensorflow-benchmarks
 spec:
   slotsPerWorker: 1
+  cleanPodPolicy: Running
   mpiReplicaSpecs:
     Launcher:
       replicas: 1
@@ -12,6 +13,32 @@ spec:
            containers:
            - image: mpioperator/tensorflow-benchmarks:latest
              name: tensorflow-benchmarks
+             command:
+             - mpirun
+             - --allow-run-as-root
+             - -np
+             - "2"
+             - -bind-to
+             - none
+             - -map-by
+             - slot
+             - -x
+             - NCCL_DEBUG=INFO
+             - -x
+             - LD_LIBRARY_PATH
+             - -x
+             - PATH
+             - -mca
+             - pml
+             - ob1
+             - -mca
+             - btl
+             - ^openib
+             - python
+             - scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py
+             - --model=resnet101
+             - --batch_size=64
+             - --variable_update=horovod
     Worker:
       replicas: 2
       template:
@@ -19,4 +46,6 @@ spec:
           containers:
           - image: mpioperator/tensorflow-benchmarks:latest
             name: tensorflow-benchmarks
-
+            resources:
+              limits:
+                nvidia.com/gpu: 1