-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile.pyspark-ml-alpine
45 lines (32 loc) · 1.55 KB
/
Dockerfile.pyspark-ml-alpine
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
FROM myelinio/pyspark:spark-2.4.1-alpine
ENV NUMPY_VERSION=1.18.2 \
SCIPY_VERSION=1.4.1
WORKDIR /work
# reference: https://github.com/abn/scipy-docker-alpine/blob/master/Dockerfile
RUN pip install --upgrade pip setuptools wheel
RUN apk --update add --virtual scipy-runtime \
&& apk add --virtual scipy-build \
build-base python3-dev openblas-dev freetype-dev pkgconfig gfortran \
&& ln -s /usr/include/locale.h /usr/include/xlocale.h \
&& pip install --no-cache-dir "numpy==$NUMPY_VERSION" \
&& pip install --no-cache-dir "scipy==1.3.0" \
&& apk add --virtual scipy-runtime \
freetype libgfortran libgcc libpng libstdc++ musl openblas tcl tk \
&& rm -rf /var/cache/apk/*
# https://github.com/AfsmNGhr/alpine-tensorflow/blob/master/Dockerfile
RUN apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/edge/testing \
libjpeg-turbo hdf5 && \
apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/edge/testing \
--virtual build-deps cmake linux-headers \
bash wget file openblas-dev freetype-dev libjpeg-turbo-dev \
libpng-dev hdf5-dev zip patch && \
pip install --no-cache-dir h5py
RUN pip install Cython \
&& pip install scikit-learn>=0.20 \
&& pip install pandas>=0.21.0
ENV PYTHONPATH=${PYTHONPATH}:/work
ADD lib/spark_pubsub-1.2-SNAPSHOT.jar ${SPARK_HOME}/jars
RUN rm ${SPARK_HOME}/jars/kubernetes-client-4.1.2.jar
ADD lib/kubernetes-client-4.4.2.jar ${SPARK_HOME}/jars
ADD ./entrypoint.sh /opt/entrypoint.sh
WORKDIR /opt/spark/work-dir