forked from nyu-mll/jiant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
98 lines (75 loc) · 3.06 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Dockerfile for jiant repo. Currently intended to run in our GCP environment.
#
# Usage:
# docker build -t jiant-sandbox:v1 .
# export JIANT_PATH="/nfs/jsalt/path/to/jiant"
# docker run --runtime=nvidia --rm -v "/nfs/jsalt:/nfs/jsalt" jiant-sandbox:v1 \
# -e "NFS_PROJECT_PREFIX=/nfs/jsalt/exp/docker" \
# -e "JIANT_PROJECT_PREFIX=/nfs/jsalt/exp/docker" \
# python $JIANT_PATH/main.py --config_file $JIANT_PATH/demo.conf \
# [ ... additional args to main.py ... ]
#
# To run on Kubernetes, see gcp/kubernetes/run_batch.sh
#
# Note that --remote_log currently doesn't work with the above command,
# since the host name seen by main.py is the name of the container, not the
# name of the host GCE instance.
# Use CUDA base image.
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
# Add Tini to handle init.
ENV TINI_VERSION v0.18.0
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
RUN chmod +x /tini
ENTRYPOINT ["/tini", "--"]
# Fix unicode issues in Python3 by setting default text file encoding.
ENV LANG C.UTF-8
# Update Ubuntu packages
RUN apt-get update && yes | apt-get upgrade
# Add utils
RUN apt-get install -y wget git bzip2
# Install Anaconda
# TODO: replace with miniconda to reduce image size.
RUN wget https://repo.anaconda.com/archive/Anaconda3-5.2.0-Linux-x86_64.sh \
&& bash Anaconda3-5.2.0-Linux-x86_64.sh -b -p /usr/share/anaconda3 \
&& rm Anaconda3-5.2.0-Linux-x86_64.sh
# Set path to conda
ENV PATH /usr/share/anaconda3/bin:$PATH
# Fix some package issues
RUN pip install --upgrade pip
RUN pip install msgpack
# Install latest TensorFlow
# TODO: pin this to a specific version!
RUN pip install --upgrade tensorflow-gpu tensorflow-hub
# Install PyTorch 0.4
RUN conda install pytorch=0.4.0 torchvision=0.2.1 cuda90 -c pytorch
# Install other requirements
RUN conda install numpy=1.14.5 nltk=3.2.5
RUN pip install ipdb tensorboard tensorboardX==1.2
# Install AllenNLP
RUN pip install allennlp==0.5.1
# Install misc util packages.
RUN pip install --upgrade google-cloud-logging sendgrid
RUN pip install python-Levenshtein ftfy==5.4.1 spacy==2.0.11
RUN python -m spacy download en
# Install local data files.
RUN python -m nltk.downloader -d /usr/share/nltk_data \
perluniprops nonbreaking_prefixes punkt
RUN pip install -U mxnet-cu90mkl gluonnlp
RUN git clone https://github.com/salesforce/cove.git \
&& cd cove \
&& pip install -r requirements.txt \
&& python setup.py develop
# Create local dir for NFS mount.
RUN mkdir -p /nfs/jsalt
# Set environment vars based on gcp/config/jsalt_paths.1.2.sh
ENV JSALT_SHARE_DIR "/nfs/jsalt/share"
ENV JIANT_DATA_DIR "$JSALT_SHARE_DIR/glue_data"
ENV GLOVE_EMBS_FILE "$JSALT_SHARE_DIR/glove/glove.840B.300d.txt"
ENV FASTTEXT_EMBS_FILE "$JSALT_SHARE_DIR/fasttext/crawl-300d-2M.vec"
ENV WORD_EMBS_FILE "$FASTTEXT_EMBS_FILE"
ENV FASTTEXT_MODEL_FILE "."
ENV PATH_TO_COVE "$JSALT_SHARE_DIR/cove"
ENV ELMO_SRC_DIR "$JSALT_SHARE_DIR/elmo"
# Set these manually with -e or via Kuberentes config YAML.
ENV NFS_PROJECT_PREFIX "/nfs/jsalt/exp/docker"
ENV JIANT_PROJECT_PREFIX "$NFS_PROJECT_PREFIX"