-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspark-task.yaml
124 lines (124 loc) · 4.2 KB
/
spark-task.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
apiVersion: v1
kind: Pod
metadata:
annotations:
sidecar.istio.io/inject: "false"
labels:
app: myelin
chart: myelin-v0.4.0
heritage: Tiller
release: myelin-uat
name: myelin-uat-drift-detection
namespace: myelin-app
spec:
containers:
- args:
- --master
- k8s://https://kubernetes.default.svc.cluster.local:443
- --deploy-mode
- cluster
- --name
- myelin-uat-drift-detection
- --conf
- spark.kubernetes.namespace=myelin-app
- --conf
- spark.kubernetes.authenticate.driver.serviceAccountName=myelin-uat-myelin
- --conf
- spark.kubernetes.container.image=myelinio/myelin-drift-detection:0.2.1
- --conf
- spark.kubernetes.container.image.pullPolicy=Always
- --conf
- spark.kubernetes.driverEnv.GCS_PROJECT_ID=myelin-development
- --conf
- spark.kubernetes.driverEnv.GOOGLE_APPLICATION_CREDENTIALS=/src/drift-detection/secrets/spark-sa.json
- --conf
- spark.kubernetes.driverEnv.PUBSUB_SUBSCRIPTION=projects/myelin-development/subscriptions/test1-sha456-logs-subscription
- --conf
- spark.kubernetes.driverEnv.CHECKPOINT_DIRECTORY=/var/spark/checkpoints_pubsub
- --conf
- spark.kubernetes.driverEnv.DRIFT_DETECTOR_TYPE=MOA_KL
- --conf
- spark.kubernetes.driverEnv.BATCH_DURATION=30
- --conf
- spark.kubernetes.driverEnv.WINDOW_DURATION=30
- --conf
- spark.kubernetes.driverEnv.BATCH_SIZE=100
- --conf
- spark.kubernetes.driverEnv.STATE_TABLE=myelin-development.test1_sha456_drift_detection.state
- --conf
- spark.kubernetes.driverEnv.PUSHGATEWAY_URL=myelin-uat-prometheus-pushgateway
- --conf
- spark.kubernetes.driverEnv.PUSHGATEWAY_PORT=9091
- --conf
- spark.kubernetes.driverEnv.MYELIN_NAMESPACE=myelin-app
- --conf
- spark.kubernetes.driverEnv.INPUT_DRIFT_PROBABILITY_METRIC=input_drift_probability
- --conf
- spark.kubernetes.driver.annotation.sidecar.istio.io/inject=false
- --conf
- spark.kubernetes.driver.secrets.spark-sa=/src/drift-detection/secrets/
- --conf
- spark.kubernetes.driver.volumes.persistentVolumeClaim.data-myelin-uat-nfs-server-0.options.claimName=data-myelin-uat-nfs-server-0
- --conf
- spark.kubernetes.driver.volumes.persistentVolumeClaim.data-myelin-uat-nfs-server-0.mount.path=/var/spark/
- --conf
- spark.driver.cores=2
- --conf
- spark.driver.memory=2048m
- --conf
- spark.kubernetes.executor.annotation.sidecar.istio.io/inject=false
- --conf
- spark.kubernetes.executor.lostCheck.maxAttempts=1
- --conf
- spark.executor.instances=1
- --conf
- spark.executor.cores=2
- --conf
- spark.executor.memory=2048m
- --conf
- spark.kubernetes.executor.secrets.spark-sa=/src/drift-detection/secrets/
- --conf
- spark.kubernetes.executor.volumes.persistentVolumeClaim.data-myelin-uat-nfs-server-0.options.claimName=data-myelin-uat-nfs-server-0
- --conf
- spark.kubernetes.executor.volumes.persistentVolumeClaim.data-myelin-uat-nfs-server-0.mount.path=/var/spark/
- --conf
- spark.kubernetes.pyspark.pythonVersion=3
- /work/drift_detection_job.py
command:
- /opt/spark/bin/spark-submit
env:
- name: PROJECT_ID
value: myelin-development
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /src/drift-detection/secrets/spark-sa.json
- name: PUBSUB_SUBSCRIPTION
value: projects/myelin-development/subscriptions/test1-sha456-logs-subscription
- name: CHECKPOINT_DIRECTORY
value: /var/spark/checkpoints_pubsub
- name: DRIFT_DETECTOR_TYPE
value: MOA_KL
image: myelinio/myelin-drift-detection:0.2.1
imagePullPolicy: Always
name: main
volumeMounts:
- mountPath: /src/drift-detection/secrets
name: google-cloud-key
readOnly: true
- mountPath: /var/spark/
name: task-pv-storage
imagePullSecrets:
- name: regcred
restartPolicy: Always
serviceAccount: myelin-uat-myelin
serviceAccountName: myelin-uat-myelin
volumes:
- name: google-cloud-key
secret:
defaultMode: 420
items:
- key: spark-sa.json
path: spark-sa.json
secretName: spark-sa
- name: task-pv-storage
persistentVolumeClaim:
claimName: data-myelin-uat-nfs-server-0