-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added distribute-training-bert-example and upgraded to vk 1.11
- Loading branch information
Showing
20 changed files
with
2,014 additions
and
514 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
## Notes | ||
|
||
1. minikube start --memory='4000' --cpus='4' --disk-size='50000mb' --driver=kvm2 --nodes 3 | ||
2. deploy nfs | ||
3. install minio | ||
1. kubectl create ns minio | ||
2. kubectl apply -f storage.yaml | ||
3. ./install.sh | ||
4. kubectl apply --wait=true -k "github.com/kubeflow/training-operator/manifests/overlays/standalone?ref=v1.7.0" # install kubeflow | ||
5. install jhub | ||
1. kubectl create ns kubeflow | ||
2. kubectl apply -f storage.yaml | ||
3. helm upgrade --cleanup-on-fail --install my-jupyter jupyterhub/jupyterhub --namespace kubeflow --create-namespace --values values.yaml | ||
6. replace minio access key and secret key on notebook | ||
7. run notebook | ||
|
||
|
||
## Scratch | ||
|
||
tChAGp5qbzy7SP80HruR | ||
CYGcLfX9tgD6r0NQoI78VuHgr39sEehyiby0jy8w |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#!/bin/bash | ||
|
||
#install minio | ||
pushd minio | ||
kubectl create ns minio | ||
kubectl apply -f hostpath-storage.yaml | ||
./install.sh | ||
popd | ||
|
||
# install kubeflow operator | ||
kubectl apply --wait=true -k "github.com/kubeflow/training-operator/manifests/overlays/standalone?ref=v1.7.0" | ||
|
||
pushd jhub | ||
kubectl create ns kubeflow | ||
kubectl apply -f hostpath-storage.yaml | ||
helm upgrade --cleanup-on-fail --install my-jupyter jupyterhub/jupyterhub --namespace kubeflow --create-namespace --values values.yaml | ||
|
||
MC_ACCESS_KEY=$(kubectl get secret myminio -n minio -o jsonpath="{.data.rootUser}" | base64 --decode) | ||
MC_SECRET_KEY=$(kubectl get secret myminio -n minio -o jsonpath="{.data.rootPassword}" | base64 --decode) | ||
echo "MinIO credentials: $MC_ACCESS_KEY $MC_SECRET_KEY" | ||
popd | ||
|
||
if [ -f "./mc" ]; then | ||
echo "Minio client exists." | ||
else | ||
echo "Minio client doesnt exist, downloading...." | ||
wget https://dl.min.io/client/mc/release/linux-amd64/mc | ||
chmod +x mc | ||
fi | ||
|
||
MINIO_SERVICE_NAME="myminio" | ||
|
||
# Get the ClusterIP using kubectl and JSONPath for precise output | ||
ENDPOINT=$(kubectl get endpoints $MINIO_SERVICE_NAME -n minio -o jsonpath='{.subsets[0].addresses[0].ip}') | ||
|
||
# Check if the endpoint was retrieved successfully | ||
if [[ -z "$ENDPOINT" ]]; then | ||
echo "Error: Could not get the first endpoint for MinIO service" | ||
exit 1 | ||
fi | ||
|
||
|
||
./mc alias set local http://$ENDPOINT:9000 $MC_ACCESS_KEY $MC_SECRET_KEY | ||
./mc mb local/kubeflow-examples | ||
# create bucket "kubeflow-examples" through minio-console |
24 changes: 24 additions & 0 deletions
24
examples/apps/bert-distr-training/jhub/hostpath-storage.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
apiVersion: v1 | ||
kind: PersistentVolume | ||
metadata: | ||
name: nfs-share-pv | ||
spec: | ||
capacity: | ||
storage: 40Gi | ||
accessModes: | ||
- ReadWriteMany | ||
hostPath: | ||
path: /home/malvag/kubeflow_testbed/jhub_data | ||
--- | ||
kind: PersistentVolumeClaim | ||
apiVersion: v1 | ||
metadata: | ||
name: nfs-share-pvc | ||
namespace: kubeflow | ||
spec: | ||
storageClassName: "" | ||
accessModes: | ||
- ReadWriteMany | ||
resources: | ||
requests: | ||
storage: 40Gi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
hub: | ||
config: | ||
JupyterHub: | ||
hub_connect_url: "http://hub:8081" | ||
ConfigurableHTTPProxy: | ||
api_url: "http://proxy-api:8001" | ||
db: | ||
type: "sqlite-memory" | ||
networkPolicy: | ||
enabled: false | ||
prePuller: | ||
hook: | ||
enabled: false | ||
proxy: | ||
service: | ||
type: ClusterIP | ||
https: | ||
enabled: true | ||
chp: | ||
networkPolicy: | ||
enabled: false | ||
singleuser: | ||
serviceAccountName: training-operator | ||
storage: | ||
dynamic: | ||
storageClass: openebs-hostpath | ||
capacity: 30Gi | ||
# type: "static" | ||
# capacity: "10Gi" | ||
# static: | ||
# pvcName: "my-storage-pvc" | ||
# subPath: "" | ||
networkPolicy: | ||
enabled: false |
24 changes: 24 additions & 0 deletions
24
examples/apps/bert-distr-training/minio/hostpath-storage.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
apiVersion: v1 | ||
kind: PersistentVolume | ||
metadata: | ||
name: nfs-share-pv-minio | ||
spec: | ||
capacity: | ||
storage: 40Gi | ||
accessModes: | ||
- ReadWriteMany | ||
hostPath: | ||
path: /home/malvag/kubeflow_testbed/minio_data | ||
--- | ||
kind: PersistentVolumeClaim | ||
apiVersion: v1 | ||
metadata: | ||
name: nfs-share-pvc-minio | ||
namespace: minio | ||
spec: | ||
storageClassName: "" | ||
accessModes: | ||
- ReadWriteMany | ||
resources: | ||
requests: | ||
storage: 20Gi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
helm repo add minio https://charts.min.io/ | ||
helm repo update | ||
|
||
TEST_NAMESPACE=minio | ||
|
||
# Install Minio | ||
helm install --debug --wait \ | ||
my-minio minio/minio \ | ||
--namespace "${TEST_NAMESPACE}" \ | ||
--set resources.requests.memory=512Mi \ | ||
--set replicas=1 \ | ||
--set persistence.enabled=false \ | ||
--set mode=standalone \ | ||
--set fullnameOverride=myminio | ||
# Extract Minio Credentials | ||
ACCESS_KEY=$(kubectl get secret myminio -n "${TEST_NAMESPACE}" -o jsonpath="{.data.rootUser}" | base64 --decode) | ||
SECRET_KEY=$(kubectl get secret myminio -n "${TEST_NAMESPACE}" -o jsonpath="{.data.rootPassword}" | base64 --decode) | ||
echo "MinIO credentials: $ACCESS_KEY $SECRET_KEY" |
Oops, something went wrong.