diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 1e5c7bb7..263904f9 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -34,27 +34,35 @@ jobs: kind load docker-image banzai:test-latest - name: Start banzai - env: - AUTH_TOKEN: ${{ secrets.ArchiveAuthToken }} run: | - cat banzai/tests/e2e-k8s.yaml + kubectl create secret generic auth-token-secret --from-literal=auth-token=${{ secrets.ArchiveAuthToken }} # Deploy banzai stack - kubectl apply -f banzai/tests/e2e-k8s.yaml + kubectl apply -k banzai/tests/k8s # Wait for banzai to be ready - kubectl wait --for=condition=Ready --timeout=60m pod/banzai-e2e-test + kubectl wait --for=condition=Ready --timeout=60m pods -l group=banzai-e2e-test + + LISTENER_POD=$(kubectl get pods -l app=banzai-listener-deployment -o jsonpath='{.items[0].metadata.name}') + echo "LISTENER_POD=${LISTENER_POD}" >> $GITHUB_ENV + WORKERS_POD=$(kubectl get pods -l app=banzai-celery-workers-deployment -o jsonpath='{.items[0].metadata.name}') + echo "WORKERS_POD=${WORKERS_POD}" >> $GITHUB_ENV + + LARGE_WORKERS_POD=$(kubectl get pods -l app=banzai-large-celery-workers-deployment -o jsonpath='{.items[0].metadata.name}') + echo "LARGE_WORKERS_POD=${LARGE_WORKERS_POD}" >> $GITHUB_ENV + - name: Test Super Bias Creation run: | set +e export START=$(date -u +'%Y-%m-%dT%H:%M:%SZ') - - kubectl exec banzai-e2e-test -c banzai-listener -- pytest -s --pyargs banzai --durations=0 --junitxml=/archive/engineering/pytest-master-bias.xml -m master_bias + kubectl exec ${LISTENER_POD} -c banzai-listener -- pytest -s --pyargs banzai --durations=0 --junitxml=/archive/engineering/pytest-master-bias.xml -m master_bias EXIT_CODE=$? # Always print logs even if (especially if?) the reduction fails - kubectl logs banzai-e2e-test --since-time=$START --all-containers --prefix=true + kubectl logs ${LISTENER_POD} --since-time=$START --all-containers --prefix=true + kubectl logs ${WORKERS_POD} --since-time=$START --all-containers --prefix=true + kubectl logs ${LARGE_WORKERS_POD} --since-time=$START --all-containers --prefix=true # Exit with the captured status so the job properly fails or succeeds exit $EXIT_CODE @@ -64,11 +72,13 @@ jobs: set +e export START=$(date -u +'%Y-%m-%dT%H:%M:%SZ') - kubectl exec banzai-e2e-test -c banzai-listener -- pytest -s --pyargs banzai --durations=0 --junitxml=/archive/engineering/pytest-master-dark.xml -m master_dark + kubectl exec ${LISTENER_POD} -c banzai-listener -- pytest -s --pyargs banzai --durations=0 --junitxml=/archive/engineering/pytest-master-dark.xml -m master_dark EXIT_CODE=$? # Always print logs even if (especially if?) the reduction fails - kubectl logs banzai-e2e-test --since-time=$START --all-containers --prefix=true + kubectl logs ${LISTENER_POD} --since-time=$START --all-containers --prefix=true + kubectl logs ${WORKERS_POD} --since-time=$START --all-containers --prefix=true + kubectl logs ${LARGE_WORKERS_POD} --since-time=$START --all-containers --prefix=true # Exit with the captured status so the job properly fails or succeeds exit $EXIT_CODE @@ -78,11 +88,13 @@ jobs: set +e export START=$(date -u +'%Y-%m-%dT%H:%M:%SZ') - kubectl exec banzai-e2e-test -c banzai-listener -- pytest -s --pyargs banzai --durations=0 --junitxml=/archive/engineering/pytest-master-flat.xml -m master_flat + kubectl exec ${LISTENER_POD} -c banzai-listener -- pytest -s --pyargs banzai --durations=0 --junitxml=/archive/engineering/pytest-master-flat.xml -m master_flat EXIT_CODE=$? # Always print logs even if (especially if?) the reduction fails - kubectl logs banzai-e2e-test --since-time=$START --all-containers --prefix=true + kubectl logs ${LISTENER_POD} --since-time=$START --all-containers --prefix=true + kubectl logs ${WORKERS_POD} --since-time=$START --all-containers --prefix=true + kubectl logs ${LARGE_WORKERS_POD} --since-time=$START --all-containers --prefix=true # Exit with the captured status so the job properly fails or succeeds exit $EXIT_CODE @@ -92,15 +104,18 @@ jobs: set +e export START=$(date -u +'%Y-%m-%dT%H:%M:%SZ') - kubectl exec banzai-e2e-test -c banzai-listener -- pytest -s --pyargs banzai --durations=0 --junitxml=/archive/engineering/pytest-science-files.xml -m science_files + kubectl exec ${LISTENER_POD} -c banzai-listener -- pytest -s --pyargs banzai --durations=0 --junitxml=/archive/engineering/pytest-science-files.xml -m science_files EXIT_CODE=$? # Always print logs even if (especially if?) the reduction fails - kubectl logs banzai-e2e-test --since-time=$START --all-containers --prefix=true + kubectl logs ${LISTENER_POD} --since-time=$START --all-containers --prefix=true + kubectl logs ${WORKERS_POD} --since-time=$START --all-containers --prefix=true + kubectl logs ${LARGE_WORKERS_POD} --since-time=$START --all-containers --prefix=true # Exit with the captured status so the job properly fails or succeeds exit $EXIT_CODE - name: Cleanup run: | - kubectl delete pod banzai-e2e-test + kubectl delete service --selector=group=banzai-e2e-test + kubectl delete deployment --selector=group=banzai-e2e-test diff --git a/banzai/tests/e2e-k8s.yaml b/banzai/tests/e2e-k8s.yaml deleted file mode 100644 index fae59ef9..00000000 --- a/banzai/tests/e2e-k8s.yaml +++ /dev/null @@ -1,248 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: banzai-e2e-test - labels: - app.kubernetes.io/name: banzai -spec: - # Create some empty directories to be mounted within the Pod - volumes: - - name: banzai-data - emptyDir: - sizeLimit: 10Gi - securityContext: - fsGroup: 10000 - - containers: - - name: banzai-redis - image: redis:5.0.3 - imagePullPolicy: IfNotPresent - resources: - requests: - cpu: 0.1 - memory: 512Mi - limits: - cpu: 1 - memory: 512Mi - readinessProbe: - exec: - command: - - /bin/sh - - -c - - 'redis-cli ping | grep -q "PONG"' - initialDelaySeconds: 5 - periodSeconds: 1 - - name: banzai-fits-exchange - image: rabbitmq:3.12.3 - imagePullPolicy: IfNotPresent - resources: - requests: - cpu: 1 - memory: 512Mi - limits: - cpu: 4 - memory: 512Mi - readinessProbe: - exec: - command: - - rabbitmq-diagnostics - - "-q" - - ping - initialDelaySeconds: 5 - periodSeconds: 1 - timeoutSeconds: 10 - - name: banzai-celery-workers - image: banzai:test-latest - imagePullPolicy: IfNotPresent - volumeMounts: - - name: banzai-data - mountPath: /archive/engineering - subPath: engineering - readOnly: false - env: - - name: DB_ADDRESS - value: "sqlite:////archive/engineering/test.db?timeout=30" - - name: RETRY_DELAY - value: "0" - - name: TASK_HOST - value: "redis://localhost:6379/0" - - name: BANZAI_WORKER_LOGLEVEL - value: debug - - name: CALIBRATE_PROPOSAL_ID - value: "calibrate" - - name: OBSERVATION_PORTAL_URL - value: "http://internal-observation-portal.lco.gtn/api/observations/" - - name: API_ROOT - value: "https://archive-api.lco.global/" - - name: OMP_NUM_THREADS - value: "1" - - name: FITS_EXCHANGE - value: "fits_files" - - name: OPENTSDB_PYTHON_METRICS_TEST_MODE - value: "1" - - name: CELERY_TASK_QUEUE_NAME - value: "e2e_task_queue" - - name: REFERENCE_CATALOG_URL - value: "http://phot-catalog.lco.gtn/" - - name: AUTH_TOKEN - valueFrom: - fieldRef: - fieldPath: hostEnv['AUTH_TOKEN'] - - command: - - celery - - -A - - banzai - - worker - - --hostname - - "banzai-celery-worker" - - --concurrency - - "4" - - -l - - "info" - - "-Q" - - "$(CELERY_TASK_QUEUE_NAME)" - - "-n" - - "celery-worker" - readinessProbe: - exec: - command: - - /bin/sh - - -c - - "celery -A banzai status | grep -q '@celery-worker:.*OK'" - initialDelaySeconds: 5 - periodSeconds: 1 - timeoutSeconds: 10 - resources: - requests: - cpu: 4 - memory: 6Gi - limits: - cpu: 8 - memory: 6Gi - - name: banzai-large-celery-workers - image: banzai:test-latest - imagePullPolicy: IfNotPresent - volumeMounts: - - name: banzai-data - mountPath: /archive/engineering - subPath: engineering - readOnly: false - env: - - name: DB_ADDRESS - value: "sqlite:////archive/engineering/test.db?timeout=30" - - name: RETRY_DELAY - value: "0" - - name: TASK_HOST - value: "redis://localhost:6379/0" - - name: BANZAI_WORKER_LOGLEVEL - value: debug - - name: CALIBRATE_PROPOSAL_ID - value: "calibrate" - - name: OBSERVATION_PORTAL_URL - value: "http://internal-observation-portal.lco.gtn/api/observations/" - - name: API_ROOT - value: "https://archive-api.lco.global/" - - name: OMP_NUM_THREADS - value: "2" - - name: FITS_EXCHANGE - value: "fits_files" - - name: OPENTSDB_PYTHON_METRICS_TEST_MODE - value: "1" - - name: CELERY_TASK_QUEUE_NAME - value: "e2e_large_task_queue" - - name: REFERENCE_CATALOG_URL - value: "http://phot-catalog.lco.gtn/" - - name: AUTH_TOKEN - valueFrom: - fieldRef: - fieldPath: hostEnv['AUTH_TOKEN'] - command: - - celery - - -A - - banzai - - worker - - --hostname - - "banzai-celery-worker" - - --concurrency - - "1" - - -l - - "info" - - "-Q" - - "$(CELERY_TASK_QUEUE_NAME)" - - "-n" - - "large-celery-worker" - readinessProbe: - exec: - command: - - /bin/sh - - -c - - 'celery -A banzai status | grep -q "large-celery-worker:.*OK"' - initialDelaySeconds: 5 - periodSeconds: 1 - timeoutSeconds: 10 - resources: - requests: - cpu: 2 - memory: 8Gi - limits: - cpu: 3 - memory: 8Gi - - name: banzai-listener - image: banzai:test-latest - imagePullPolicy: IfNotPresent - volumeMounts: - - name: banzai-data - mountPath: /archive/engineering - subPath: engineering - readOnly: false - env: - - name: DB_ADDRESS - value: "sqlite:////archive/engineering/test.db?timeout=30" - - name: FITS_BROKER - value: "localhost" - - name: TASK_HOST - value: "redis://localhost:6379/0" - - name: CALIBRATE_PROPOSAL_ID - value: "calibrate" - - name: OBSERVATION_PORTAL_URL - value: "http://internal-observation-portal.lco.gtn/api/observations/" - - name: API_ROOT - value: "https://archive-api.lco.global/" - - name: FITS_EXCHANGE - value: "fits_files" - - name: OPENTSDB_PYTHON_METRICS_TEST_MODE - value: "1" - - name: CELERY_TASK_QUEUE_NAME - value: "e2e_task_queue" - - name: REFERENCE_CATALOG_URL - value: "http://phot-catalog.lco.gtn/" - - name: "CELERY_LARGE_TASK_QUEUE_NAME" - value: "e2e_large_task_queue" - - name: AUTH_TOKEN - valueFrom: - fieldRef: - fieldPath: hostEnv['AUTH_TOKEN'] - command: - - banzai_run_realtime_pipeline - - --db-address=$(DB_ADDRESS) - - --fpack - - --broker-url=localhost - resources: - requests: - cpu: 0.1 - memory: 1Gi - limits: - cpu: 1 - memory: 1Gi - readinessProbe: - exec: - command: - - /bin/sh - - -c - - 'ps -u archive | grep -q "banzai_run_real"' - initialDelaySeconds: 5 - periodSeconds: 1 - timeoutSeconds: 10 - dnsPolicy: ClusterFirst - restartPolicy: Never diff --git a/banzai/tests/k8s/e2e-k8s.yaml b/banzai/tests/k8s/e2e-k8s.yaml new file mode 100644 index 00000000..a253a0c0 --- /dev/null +++ b/banzai/tests/k8s/e2e-k8s.yaml @@ -0,0 +1,341 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: banzai-redis-deployment + labels: + app: banzai-redis + group: banzai-e2e-test +spec: + replicas: 1 + selector: + matchLabels: + app: banzai-redis + template: + metadata: + labels: + app: banzai-redis + spec: + securityContext: + fsGroup: 10000 + containers: + - name: banzai-redis + image: redis:5.0.3 + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: 0.1 + memory: 512Mi + limits: + cpu: 1 + memory: 512Mi + readinessProbe: + exec: + command: + - /bin/sh + - -c + - 'redis-cli ping | grep -q "PONG"' + initialDelaySeconds: 5 + periodSeconds: 1 + +--- +apiVersion: v1 +kind: Service +metadata: + name: banzai-redis + labels: + app: banzai-redis + group: banzai-e2e-test +spec: + selector: + app: banzai-redis + ports: + - port: 6379 + targetPort: 6379 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: banzai-fits-exchange-deployment + labels: + app: banzai-fits-exchange + group: banzai-e2e-test +spec: + replicas: 1 + selector: + matchLabels: + app: banzai-fits-exchange + template: + metadata: + labels: + app: banzai-fits-exchange + spec: + securityContext: + fsGroup: 10000 + containers: + - name: banzai-fits-exchange + image: rabbitmq:3.12.3 + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: 1 + memory: 512Mi + limits: + cpu: 4 + memory: 512Mi + readinessProbe: + exec: + command: + - rabbitmq-diagnostics + - "-q" + - ping + initialDelaySeconds: 5 + periodSeconds: 1 + timeoutSeconds: 10 +--- +apiVersion: v1 +kind: Service +metadata: + name: banzai-fits-exchange + labels: + app: banzai-fits-exchange + group: banzai-e2e-test +spec: + selector: + app: banzai-fits-exchange + ports: + - port: 5672 + targetPort: 5672 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: banzai-celery-workers-deployment + labels: + app: banzai-celery-workers + group: banzai-e2e-test +spec: + replicas: 1 + selector: + matchLabels: + app: banzai-celery-workers + template: + metadata: + labels: + app: banzai-celery-workers + spec: + restartPolicy: Always + securityContext: + fsGroup: 10000 + containers: + - name: banzai-celery-workers + image: banzai:test-latest + imagePullPolicy: IfNotPresent + env: + - name: DB_ADDRESS + value: "sqlite:////archive/engineering/test.db?timeout=30" + - name: RETRY_DELAY + value: "0" + - name: TASK_HOST + value: "redis://banzai-redis:6379/0" # Adjust if needed + - name: BANZAI_WORKER_LOGLEVEL + value: debug + - name: CALIBRATE_PROPOSAL_ID + value: "calibrate" + - name: OBSERVATION_PORTAL_URL + value: "http://internal-observation-portal.lco.gtn/api/observations/" + - name: API_ROOT + value: "https://archive-api.lco.global/" + - name: OMP_NUM_THREADS + value: "1" + - name: FITS_EXCHANGE + value: "fits_files" + - name: OPENTSDB_PYTHON_METRICS_TEST_MODE + value: "1" + - name: CELERY_TASK_QUEUE_NAME + value: "e2e_task_queue" + - name: REFERENCE_CATALOG_URL + value: "http://phot-catalog.lco.gtn/" + command: + - celery + - -A + - banzai + - worker + - --hostname + - "banzai-celery-worker" + - --concurrency + - "4" + - -l + - "info" + - "-Q" + - "$(CELERY_TASK_QUEUE_NAME)" + - "-n" + - "celery-worker" + readinessProbe: + exec: + command: + - /bin/sh + - -c + - "celery -A banzai status | grep -q '@celery-worker:.*OK'" + initialDelaySeconds: 5 + periodSeconds: 1 + timeoutSeconds: 10 + resources: + requests: + cpu: 4 + memory: 6Gi + limits: + cpu: 8 + memory: 6Gi + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: banzai-large-celery-workers-deployment + labels: + app: banzai-large-celery-workers + group: banzai-e2e-test +spec: + replicas: 1 + selector: + matchLabels: + app: banzai-large-celery-workers + template: + metadata: + labels: + app: banzai-large-celery-workers + spec: + restartPolicy: Always + securityContext: + fsGroup: 10000 + containers: + - name: banzai-large-celery-workers + image: banzai:test-latest + imagePullPolicy: IfNotPresent + env: + - name: DB_ADDRESS + value: "sqlite:////archive/engineering/test.db?timeout=30" + - name: RETRY_DELAY + value: "0" + - name: TASK_HOST + value: "redis://banzai-redis:6379/0" # Adjust if needed + - name: BANZAI_WORKER_LOGLEVEL + value: debug + - name: CALIBRATE_PROPOSAL_ID + value: "calibrate" + - name: OBSERVATION_PORTAL_URL + value: "http://internal-observation-portal.lco.gtn/api/observations/" + - name: API_ROOT + value: "https://archive-api.lco.global/" + - name: OMP_NUM_THREADS + value: "2" + - name: FITS_EXCHANGE + value: "fits_files" + - name: OPENTSDB_PYTHON_METRICS_TEST_MODE + value: "1" + - name: CELERY_TASK_QUEUE_NAME + value: "e2e_large_task_queue" + - name: REFERENCE_CATALOG_URL + value: "http://phot-catalog.lco.gtn/" + command: + - celery + - -A + - banzai + - worker + - --hostname + - "banzai-celery-worker" + - --concurrency + - "1" + - -l + - "info" + - "-Q" + - "$(CELERY_TASK_QUEUE_NAME)" + - "-n" + - "large-celery-worker" + readinessProbe: + exec: + command: + - /bin/sh + - -c + - 'celery -A banzai status | grep -q "large-celery-worker:.*OK"' + initialDelaySeconds: 5 + periodSeconds: 1 + timeoutSeconds: 10 + resources: + requests: + cpu: 2 + memory: 8Gi + limits: + cpu: 3 + memory: 8Gi + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: banzai-listener-deployment + labels: + app: banzai-listener + group: banzai-e2e-test +spec: + replicas: 1 + selector: + matchLabels: + app: banzai-listener + template: + metadata: + labels: + app: banzai-listener + spec: + restartPolicy: Always + securityContext: + fsGroup: 10000 + containers: + - name: banzai-listener + image: banzai:test-latest + imagePullPolicy: IfNotPresent + env: + - name: DB_ADDRESS + value: "sqlite:////archive/engineering/test.db?timeout=30" + - name: FITS_BROKER + value: "banzai-fits-exchange" + - name: TASK_HOST + value: "redis://banzai-redis:6379/0" + - name: CALIBRATE_PROPOSAL_ID + value: "calibrate" + - name: OBSERVATION_PORTAL_URL + value: "http://internal-observation-portal.lco.gtn/api/observations/" + - name: API_ROOT + value: "https://archive-api.lco.global/" + - name: FITS_EXCHANGE + value: "fits_files" + - name: OPENTSDB_PYTHON_METRICS_TEST_MODE + value: "1" + - name: CELERY_TASK_QUEUE_NAME + value: "e2e_task_queue" + - name: REFERENCE_CATALOG_URL + value: "http://phot-catalog.lco.gtn/" + - name: CELERY_LARGE_TASK_QUEUE_NAME + value: "e2e_large_task_queue" + command: + - banzai_run_realtime_pipeline + - --db-address=$(DB_ADDRESS) + - --fpack + - --broker-url=banzai-fits-exchange + resources: + requests: + cpu: 0.1 + memory: 1Gi + limits: + cpu: 1 + memory: 1Gi + readinessProbe: + exec: + command: + - /bin/sh + - -c + - 'ps -u archive | grep -q "banzai_run_real"' + initialDelaySeconds: 5 + periodSeconds: 1 + timeoutSeconds: 10 diff --git a/banzai/tests/k8s/kustomization.yaml b/banzai/tests/k8s/kustomization.yaml new file mode 100644 index 00000000..2cb4aa4e --- /dev/null +++ b/banzai/tests/k8s/kustomization.yaml @@ -0,0 +1,55 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - e2e-k8s.yaml + +patches: + - patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: banzai-listener-deployment + spec: + template: + spec: + containers: + - name: banzai-listener + env: + - name: AUTH_TOKEN + valueFrom: + secretKeyRef: + name: auth-token-secret + key: auth-token + - patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: banzai-celery-workers-deployment + spec: + template: + spec: + containers: + - name: banzai-celery-workers + env: + - name: AUTH_TOKEN + valueFrom: + secretKeyRef: + name: auth-token-secret + key: auth-token + - patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: banzai-large-celery-workers-deployment + spec: + template: + spec: + containers: + - name: banzai-large-celery-workers + env: + - name: AUTH_TOKEN + valueFrom: + secretKeyRef: + name: auth-token-secret + key: auth-token