-
Notifications
You must be signed in to change notification settings - Fork 163
/
Copy pathDockerfile.spark
34 lines (24 loc) · 967 Bytes
/
Dockerfile.spark
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
FROM quay.io/dataprep1/data-prep-kit/data-prep-kit-spark-3.5.2:latest
USER root
# install pytest
RUN pip install --no-cache-dir pytest
WORKDIR ${SPARK_HOME}/work-dir
ARG DPK_WHEEL_FILE_NAME
ARG TRANSFORM_NAME
# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=spark:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${DPK_WHEEL_FILE_NAME}[spark]
# Install project source
## Copy the python version of the tansform
COPY --chown=spark:users dpk_${TRANSFORM_NAME}/ dpk_${TRANSFORM_NAME}/
COPY --chown=spark:users requirements.txt requirements.txt
RUN pip install -r requirements.txt
USER spark
# Set environment
ENV PYTHONPATH=${SPARK_HOME}/work-dir/:${PYTHONPATH}
# Put these at the end since they seem to upset the docker cache.
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT