From 7465ae5ba8e9e5903146af572481e6126c13ec31 Mon Sep 17 00:00:00 2001 From: polishchuks Date: Fri, 17 Jan 2025 20:04:36 +0200 Subject: [PATCH] feat: add punkt_tab to NLTK data downloads Add punkt_tab to the list of NLTK downloads in Python 3.10, 3.11, and 3.12 Dockerfiles to support additional text processing capabilities. --- epicbox-python/310/Dockerfile | 2 +- epicbox-python/311/Dockerfile | 2 +- epicbox-python/312/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/epicbox-python/310/Dockerfile b/epicbox-python/310/Dockerfile index 66dcd16..a72ec55 100644 --- a/epicbox-python/310/Dockerfile +++ b/epicbox-python/310/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ COPY requirements.txt /tmp RUN pip install --no-cache-dir -r /tmp/requirements.txt \ - && python -m nltk.downloader -d ${NLTK_DIR} averaged_perceptron_tagger brown gutenberg movie_reviews omw-1.4 punkt treebank word2vec_sample wordnet \ + && python -m nltk.downloader -d ${NLTK_DIR} averaged_perceptron_tagger brown gutenberg movie_reviews omw-1.4 punkt punkt_tab treebank word2vec_sample wordnet \ && wget -qO- https://download.cdn.yandex.net/mystem/mystem-3.1-linux-64bit.tar.gz | tar xvz -C ${MYSTEM_DIR} \ && rm /tmp/requirements.txt diff --git a/epicbox-python/311/Dockerfile b/epicbox-python/311/Dockerfile index 1d0bbdc..e8f1b68 100644 --- a/epicbox-python/311/Dockerfile +++ b/epicbox-python/311/Dockerfile @@ -36,7 +36,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ COPY requirements.txt /tmp RUN pip install --no-cache-dir -r /tmp/requirements.txt \ - && python -m nltk.downloader -d ${NLTK_DIR} averaged_perceptron_tagger brown gutenberg movie_reviews omw-1.4 punkt treebank word2vec_sample wordnet \ + && python -m nltk.downloader -d ${NLTK_DIR} averaged_perceptron_tagger brown gutenberg movie_reviews omw-1.4 punkt punkt_tab treebank word2vec_sample wordnet \ && wget -qO- https://download.cdn.yandex.net/mystem/mystem-3.1-linux-64bit.tar.gz | tar xvz -C ${MYSTEM_DIR} \ && rm /tmp/requirements.txt diff --git a/epicbox-python/312/Dockerfile b/epicbox-python/312/Dockerfile index 0c4abde..8ec3036 100644 --- a/epicbox-python/312/Dockerfile +++ b/epicbox-python/312/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ COPY requirements.txt /tmp RUN pip install --no-cache-dir -r /tmp/requirements.txt \ - && python -m nltk.downloader -d ${NLTK_DIR} averaged_perceptron_tagger brown gutenberg movie_reviews omw-1.4 punkt treebank word2vec_sample wordnet \ + && python -m nltk.downloader -d ${NLTK_DIR} averaged_perceptron_tagger brown gutenberg movie_reviews omw-1.4 punkt punkt_tab treebank word2vec_sample wordnet \ && wget -qO- https://download.cdn.yandex.net/mystem/mystem-3.1-linux-64bit.tar.gz | tar xvz -C ${MYSTEM_DIR} \ && rm /tmp/requirements.txt