diff --git a/.make.defaults b/.make.defaults index f9f58500fd..e1bd5275ae 100644 --- a/.make.defaults +++ b/.make.defaults @@ -235,6 +235,10 @@ __check_defined = \ cp -p -R ${LIB_PATH}/src ${LIB_NAME} cp -p -R ${LIB_PATH}/pyproject.toml ${LIB_NAME} cp -p -R ${LIB_PATH}/README.md ${LIB_NAME} + if [ -e ${LIB_PATH}/requirements.txt ]; then \ + cp -p ${LIB_PATH}/requirements.txt ${LIB_NAME}; \ + fi + # Build and image using the local Dockerfile and make the data-processing-lib/python # available in the current directory for use by the Dockerfile (i.e. to install the library). @@ -591,8 +595,9 @@ MINIO_ADMIN_PWD= localminiosecretkey # Updates the versions references to our repo source as defined in .make.versions .PHONY: .defaults.__update-toml-lib-dep-versions .defaults.__update-toml-lib-dep-versions: +ifeq ($(USE_REPO_LIB_SRC), 1) @# Help: Update pyproject.toml to depend on lib versions defined in .make.versions - @if [ -e pyproject.toml ]; then \ + if [ -e pyproject.toml ]; then \ cat pyproject.toml | sed \ -e 's/"data-prep-toolkit-ray\([=><~][=]\).*"/"data-prep-toolkit-ray\1$(DPK_LIB_VERSION)"/' \ -e 's/"data-prep-toolkit-spark\([=><~][=]\).*"/"data-prep-toolkit-spark\1$(DPK_LIB_VERSION)"/' \ @@ -603,7 +608,7 @@ MINIO_ADMIN_PWD= localminiosecretkey > tt.toml; \ mv tt.toml pyproject.toml; \ fi - @if [ -e requirements.txt ]; then \ + if [ -e requirements.txt ]; then \ cat requirements.txt | sed \ -e 's/data-prep-toolkit-ray\([=><~][=]\).*/data-prep-toolkit-ray\1$(DPK_LIB_VERSION)/' \ -e 's/data-prep-toolkit-transforms\([=><~][=]\).*/data-prep-toolkit-transforms\1$(DPK_TRANSFORMS_VERSION)/' \ @@ -615,6 +620,7 @@ MINIO_ADMIN_PWD= localminiosecretkey > tt.txt; \ mv tt.txt requirements.txt; \ fi +endif # Build the distribution, usually in preparation for publishing using ith the .defaults.publish-dist target .PHONY: .defaults.build-dist diff --git a/.make.versions b/.make.versions index dd599aa04b..4346291ccd 100644 --- a/.make.versions +++ b/.make.versions @@ -25,9 +25,9 @@ DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_ # publish docker images with latest tag ifeq ($(DPK_VERSION_SUFFIX), ) - DOCKER_IMAGE_VERSION=$(DPK_VERSION) + DOCKER_IMAGE_VERSION?=$(DPK_VERSION) else - DOCKER_IMAGE_VERSION=latest + DOCKER_IMAGE_VERSION?=latest endif # Data prep lab wheel version @@ -39,82 +39,6 @@ DPK_LIB_KFP_SHARED=$(DPK_VERSION) KFP_DOCKER_VERSION=$(DOCKER_IMAGE_VERSION) KFP_DOCKER_VERSION_v2=$(DOCKER_IMAGE_VERSION) -# Begin transform versions/tags -BLOCKLIST_VERSION=$(DPK_VERSION) - -DOC_ID_PYTHON_VERSION=$(DPK_VERSION) -DOC_ID_RAY_VERSION=$(DPK_VERSION) -DOC_ID_SPARK_VERSION=$(DPK_VERSION) - -EDEDUP_PYTHON_VERSION=$(DPK_VERSION) -EDEDUP_RAY_VERSION=$(DPK_VERSION) - -FDEDUP_RAY_VERSION=$(DPK_VERSION) - -FILTER_PYTHON_VERSION=$(DPK_VERSION) -FILTER_RAY_VERSION=$(DPK_VERSION) -FILTER_SPARK_VERSION=$(DPK_VERSION) - -NOOP_PYTHON_VERSION=$(DPK_VERSION) -NOOP_RAY_VERSION=$(DPK_VERSION) -NOOP_SPARK_VERSION=$(DPK_VERSION) - -PROFILER_PYTHON_VERSION=$(DPK_VERSION) -PROFILER_RAY_VERSION=$(DPK_VERSION) -PROFILER_SPARK_VERSION=$(DPK_VERSION) - -RESIZE_PYTHON_VERSION=$(DPK_VERSION) -RESIZE_RAY_VERSION=$(DPK_VERSION) -RESIZE_SPARK_VERSION=$(DPK_VERSION) - -LANG_ID_PYTHON_VERSION=$(DPK_VERSION) -LANG_ID_RAY_VERSION=$(DPK_VERSION) - -TOKENIZATION_RAY_VERSION=$(DPK_VERSION) -TOKENIZATION_PYTHON_VERSION=$(DPK_VERSION) - -MALWARE_RAY_VERSION=$(DPK_VERSION) -MALWARE_PYTHON_VERSION=$(DPK_VERSION) - -PROGLANG_SELECT_PYTHON_VERSION=$(DPK_VERSION) -PROGLANG_SELECT_RAY_VERSION=$(DPK_VERSION) - -DOC_QUALITY_PYTHON_VERSION=$(DPK_VERSION) -DOC_QUALITY_RAY_VERSION=$(DPK_VERSION) - -CODE_QUALITY_RAY_VERSION=$(DPK_VERSION) -CODE_QUALITY_PYTHON_VERSION=$(DPK_VERSION) - -CODE2PARQUET_PYTHON_VERSION=$(DPK_VERSION) -CODE2PARQUET_RAY_VERSION=$(DPK_VERSION) -INGEST_TO_PARQUET_VERSION=$(DPK_VERSION) -REPO_LVL_ORDER_RAY_VERSION=$(DPK_VERSION) - -PDF2PARQUET_PYTHON_VERSION=$(DPK_VERSION) -PDF2PARQUET_RAY_VERSION=$(DPK_VERSION) - -DOC_CHUNK_PYTHON_VERSION=$(DPK_VERSION) -DOC_CHUNK_RAY_VERSION=$(DPK_VERSION) - -TEXT_ENCODER_PYTHON_VERSION=$(DPK_VERSION) -TEXT_ENCODER_RAY_VERSION=$(DPK_VERSION) - -HEADER_CLEANSER_PYTHON_VERSION=$(DPK_VERSION) -HEADER_CLEANSER_RAY_VERSION=$(DPK_VERSION) - -LICENSE_SELECT_PYTHON_VERSION=$(DPK_VERSION) -LICENSE_SELECT_RAY_VERSION=$(DPK_VERSION) - -PII_REDACTOR_PYTHON_VERSION=$(DPK_VERSION) - -HTML2PARQUET_PYTHON_VERSION=$(DPK_VERSION) - -DPK_TRANSFORMS_VERSION=$(DPK_VERSION) - -SYNTACTIC_CONCEPT_EXTRACTOR_PYTHON_VERSION=$(DPK_VERSION) -SYNTACTIC_CONCEPT_EXTRACTOR_RAY_VERSION=$(DPK_VERSION) - - ################## ################## ################## ################## ################## ################## # Begin versions that the repo depends on. diff --git a/README.md b/README.md index aeec4ef704..b4d3723564 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,8 @@ The goal is to offer high-level APIs for developers to quickly get started in wo - [Scaling transforms from laptop to cluster](#laptop_cluster) - [Repository Use and Navigation](doc/repo.md) - [How to Contribute](CONTRIBUTING.md) -- [Papers and Talks](#talks_papers) +- [Talks and Papers](#talks_papers) +- [Citations](#citations) ## 📖 About @@ -131,7 +132,7 @@ The matrix below shows the the combination of modules and supported runtimes. Al | **Data Ingestion** | | | | | | [Code (from zip) to Parquet](transforms/code/code2parquet/python/README.md) | :white_check_mark: | :white_check_mark: | | :white_check_mark: | | [PDF to Parquet](transforms/language/pdf2parquet/python/README.md) | :white_check_mark: | :white_check_mark: | | :white_check_mark: | -| [HTML to Parquet](transforms/universal/html2parquet/python/README.md) | :white_check_mark: | | | | +| [HTML to Parquet](transforms/language/html2parquet/python/README.md) | :white_check_mark: | :white_check_mark: | | | | **Universal (Code & Language)** | | | | | | [Exact dedup filter](transforms/universal/ededup/ray/README.md) | :white_check_mark: | :white_check_mark: | | :white_check_mark: | | [Fuzzy dedup filter](transforms/universal/fdedup/ray/README.md) | | :white_check_mark: | | :white_check_mark: | @@ -220,3 +221,23 @@ You can run transforms via docker image or using virtual environments. This [doc 5. Talk on "Hands on session for fine tuning LLMs" [Video](https://www.youtube.com/watch?v=VEHIA3E64DM) 6. Talk on "Build your own data preparation module using data-prep-kit" [Video](https://www.youtube.com/watch?v=0WUMG6HIgMg) +## Citations + +If you use Data Prep Kit in your research, please cite our paper: + +```bash +@misc{wood2024dataprepkitgettingdataready, + title={Data-Prep-Kit: getting your data ready for LLM application development}, + author={David Wood and Boris Lublinsky and Alexy Roytman and Shivdeep Singh + and Abdulhamid Adebayo and Revital Eres and Mohammad Nassar and Hima Patel + and Yousaf Shah and Constantin Adam and Petros Zerfos and Nirmit Desai + and Daiki Tsuzuku and Takuya Goto and Michele Dolfi and Saptha Surendran + and Paramesvaran Selvam and Sungeun An and Yuan Chi Chang and Dhiraj Joshi + and Hajar Emami-Gohari and Xuan-Hong Dang and Yan Koyfman and Shahrokh Daijavad}, + year={2024}, + eprint={2409.18164}, + archivePrefix={arXiv}, + primaryClass={cs.AI}, + url={https://arxiv.org/abs/2409.18164}, +} +``` \ No newline at end of file diff --git a/transforms/code/code2parquet/kfp_ray/Makefile b/transforms/code/code2parquet/kfp_ray/Makefile index 6b9e640d17..847a743b8a 100644 --- a/transforms/code/code2parquet/kfp_ray/Makefile +++ b/transforms/code/code2parquet/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/code/code2parquet/python/Makefile b/transforms/code/code2parquet/python/Makefile index d0403e6012..e27e402c7c 100644 --- a/transforms/code/code2parquet/python/Makefile +++ b/transforms/code/code2parquet/python/Makefile @@ -1,16 +1,22 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME=code2parquet - include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + venv:: .transforms.python-venv test:: .transforms.python-test diff --git a/transforms/code/code2parquet/ray/Makefile b/transforms/code/code2parquet/ray/Makefile index bc15809873..42383457f9 100644 --- a/transforms/code/code2parquet/ray/Makefile +++ b/transforms/code/code2parquet/ray/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=code2parquet +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/code/code2parquet/transform.config b/transforms/code/code2parquet/transform.config new file mode 100644 index 0000000000..2049a2261d --- /dev/null +++ b/transforms/code/code2parquet/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=code2parquet + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +CODE2PARQUET_PYTHON_VERSION=$(DPK_VERSION) +CODE2PARQUET_RAY_VERSION=$(CODE2PARQUET_PYTHON_VERSION) +CODE2PARQUET_SPARK_VERSION=$(CODE2PARQUET_PYTHON_VERSION) + diff --git a/transforms/code/code_quality/kfp_ray/Makefile b/transforms/code/code_quality/kfp_ray/Makefile index a22efcf8ed..1cab0d8789 100644 --- a/transforms/code/code_quality/kfp_ray/Makefile +++ b/transforms/code/code_quality/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') @@ -48,4 +51,4 @@ workflow-test: workflow-build workflow-upload: workflow-build @for file in $(YAML_WF); do \ $(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \ - done \ No newline at end of file + done diff --git a/transforms/code/code_quality/python/Makefile b/transforms/code/code_quality/python/Makefile index 1b50d41b88..cd9811f797 100644 --- a/transforms/code/code_quality/python/Makefile +++ b/transforms/code/code_quality/python/Makefile @@ -1,10 +1,21 @@ - -# Define the root of the local git clone for the common rules to be able +# Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + +# Include a library of common .transform.* targets which most +# transforms should be able to reuse. However, feel free +# to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=code_quality +# Include the common configuration for this transform +include ../transform.config # Use default rule inherited from makefile.common clean:: .transforms.clean diff --git a/transforms/code/code_quality/ray/Makefile b/transforms/code/code_quality/ray/Makefile index 720cf9c00d..5a744e861b 100644 --- a/transforms/code/code_quality/ray/Makefile +++ b/transforms/code/code_quality/ray/Makefile @@ -1,10 +1,21 @@ - -# Define the root of the local git clone for the common rules to be able +# Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + +# Include a library of common .transform.* targets which most +# transforms should be able to reuse. However, feel free +# to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=code_quality +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} diff --git a/transforms/code/code_quality/transform.config b/transforms/code/code_quality/transform.config new file mode 100644 index 0000000000..4ebec625a5 --- /dev/null +++ b/transforms/code/code_quality/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=code_quality + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +CODE_QUALITY_PYTHON_VERSION=$(DPK_VERSION) +CODE_QUALITY_RAY_VERSION=$(CODE_QUALITY_PYTHON_VERSION) +CODE_QUALITY_SPARK_VERSION=$(CODE_QUALITY_PYTHON_VERSION) + diff --git a/transforms/code/header_cleanser/kfp_ray/Makefile b/transforms/code/header_cleanser/kfp_ray/Makefile.disable-cicd similarity index 91% rename from transforms/code/header_cleanser/kfp_ray/Makefile rename to transforms/code/header_cleanser/kfp_ray/Makefile.disable-cicd index 05a3433847..411cc97f1e 100644 --- a/transforms/code/header_cleanser/kfp_ray/Makefile +++ b/transforms/code/header_cleanser/kfp_ray/Makefile.disable-cicd @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') @@ -48,4 +51,4 @@ workflow-test: workflow-build workflow-upload: workflow-build @for file in $(YAML_WF); do \ $(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \ - done \ No newline at end of file + done diff --git a/transforms/code/header_cleanser/python/Makefile b/transforms/code/header_cleanser/python/Makefile index 1e3fa68fd8..0a91a14d6c 100644 --- a/transforms/code/header_cleanser/python/Makefile +++ b/transforms/code/header_cleanser/python/Makefile @@ -1,10 +1,21 @@ - -# Define the root of the local git clone for the common rules to be able +# Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + +# Include a library of common .transform.* targets which most +# transforms should be able to reuse. However, feel free +# to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=header_cleanser +# Include the common configuration for this transform +include ../transform.config # Use default rule inherited from makefile.common clean:: .transforms.clean diff --git a/transforms/code/header_cleanser/ray/Dockerfile b/transforms/code/header_cleanser/ray/Dockerfile index 6521c8662f..16f8cf69c1 100644 --- a/transforms/code/header_cleanser/ray/Dockerfile +++ b/transforms/code/header_cleanser/ray/Dockerfile @@ -17,9 +17,11 @@ COPY --chown=ray:users pyproject.toml pyproject.toml RUN pip install --no-cache-dir -e . # Install system dependencies, including libgomp1 +USER root RUN sudo apt-get update && sudo apt-get install -y \ libgomp1 \ && sudo rm -rf /var/lib/apt/lists/* +User ray # copy source data COPY ./src/header_cleanser_transform_ray.py . @@ -36,4 +38,4 @@ ENV PYTHONPATH /home/ray ARG BUILD_DATE ARG GIT_COMMIT LABEL build-date=$BUILD_DATE -LABEL git-commit=$GIT_COMMIT \ No newline at end of file +LABEL git-commit=$GIT_COMMIT diff --git a/transforms/code/header_cleanser/ray/Makefile b/transforms/code/header_cleanser/ray/Makefile index d223bc1cbf..9d83c71d0e 100644 --- a/transforms/code/header_cleanser/ray/Makefile +++ b/transforms/code/header_cleanser/ray/Makefile @@ -1,10 +1,21 @@ - -# Define the root of the local git clone for the common rules to be able +# Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + +# Include a library of common .transform.* targets which most +# transforms should be able to reuse. However, feel free +# to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=header_cleanser +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} diff --git a/transforms/code/header_cleanser/transform.config b/transforms/code/header_cleanser/transform.config new file mode 100644 index 0000000000..e1da13d0c6 --- /dev/null +++ b/transforms/code/header_cleanser/transform.config @@ -0,0 +1,18 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=header_cleanser + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +HEADER_CLEANSER_PYTHON_VERSION=$(DPK_VERSION) +HEADER_CLEANSER_RAY_VERSION=$(HEADER_CLEANSER_PYTHON_VERSION) diff --git a/transforms/code/license_select/kfp_ray/Makefile.disable-cicd b/transforms/code/license_select/kfp_ray/Makefile.disable-cicd index 9f21f3d580..28e244faa9 100644 --- a/transforms/code/license_select/kfp_ray/Makefile.disable-cicd +++ b/transforms/code/license_select/kfp_ray/Makefile.disable-cicd @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/code/license_select/python/Makefile b/transforms/code/license_select/python/Makefile index 7077c801fa..2f3825fdae 100644 --- a/transforms/code/license_select/python/Makefile +++ b/transforms/code/license_select/python/Makefile @@ -1,10 +1,22 @@ -# Define the root of the local git clone for the common rules to be able +# Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + +# Include a library of common .transform.* targets which most +# transforms should be able to reuse. However, feel free +# to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=license_select -# $(REPOROOT)/.make.versions file contains the versions +# Include the common configuration for this transform +include ../transform.config + DOCKER_IMAGE_VERSION=${LICENSE_SELECT_PYTHON_VERSION} # Use default rule inherited from makefile.common diff --git a/transforms/code/license_select/ray/Makefile b/transforms/code/license_select/ray/Makefile index 25fe6ab8a8..d69cf00ca4 100644 --- a/transforms/code/license_select/ray/Makefile +++ b/transforms/code/license_select/ray/Makefile @@ -1,15 +1,24 @@ - -# Define the root of the local git clone for the common rules to be able +# Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + BASE_IMAGE=$(RAY_BASE_IMAGE) -TRANSFORM_NAME=license_select -# $(REPOROOT)/.make.versions file contains the versions + DOCKER_IMAGE_VERSION=${LICENSE_SELECT_RAY_VERSION} # Use default rule inherited from makefile.common diff --git a/transforms/code/license_select/transform.config b/transforms/code/license_select/transform.config new file mode 100644 index 0000000000..bba10d3e5f --- /dev/null +++ b/transforms/code/license_select/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=license_select + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +LICENSE_SELECT_PYTHON_VERSION=$(DPK_VERSION) +LICENSE_SELECT_RAY_VERSION=$(LICENSE_SELECT_PYTHON_VERSION) +LICENSE_SELECT_SPARK_VERSION=$(LICENSE_SELECT_PYTHON_VERSION) + diff --git a/transforms/code/malware/kfp_ray/Makefile b/transforms/code/malware/kfp_ray/Makefile index 7b423d8bdc..0446e2d291 100644 --- a/transforms/code/malware/kfp_ray/Makefile +++ b/transforms/code/malware/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') @@ -48,4 +51,4 @@ workflow-test: workflow-build workflow-upload: workflow-build @for file in $(YAML_WF); do \ $(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \ - done \ No newline at end of file + done diff --git a/transforms/code/malware/python/Makefile b/transforms/code/malware/python/Makefile index 99174e9a1d..bd523b6291 100644 --- a/transforms/code/malware/python/Makefile +++ b/transforms/code/malware/python/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=malware +# Include the common configuration for this transform +include ../transform.config OS := $(shell uname -s) ifeq ($(OS),Darwin) diff --git a/transforms/code/malware/ray/Makefile b/transforms/code/malware/ray/Makefile index 99515c0363..a92cbd529d 100644 --- a/transforms/code/malware/ray/Makefile +++ b/transforms/code/malware/ray/Makefile @@ -1,12 +1,22 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=malware +# Include the common configuration for this transform +include ../transform.config + BASE_IMAGE=${RAY_BASE_IMAGE} OS := $(shell uname -s) ifeq ($(OS),Darwin) diff --git a/transforms/code/malware/transform.config b/transforms/code/malware/transform.config new file mode 100644 index 0000000000..be0b6651d6 --- /dev/null +++ b/transforms/code/malware/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=malware + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +MALWARE_PYTHON_VERSION=$(DPK_VERSION) +MALWARE_RAY_VERSION=$(MALWARE_PYTHON_VERSION) +MALWARE_SPARK_VERSION=$(MALWARE_PYTHON_VERSION) + diff --git a/transforms/code/proglang_select/kfp_ray/Makefile b/transforms/code/proglang_select/kfp_ray/Makefile index abbf75c8cd..b8a21bca83 100644 --- a/transforms/code/proglang_select/kfp_ray/Makefile +++ b/transforms/code/proglang_select/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/code/proglang_select/python/Makefile b/transforms/code/proglang_select/python/Makefile index 2cec4f6db4..7d64e0a904 100644 --- a/transforms/code/proglang_select/python/Makefile +++ b/transforms/code/proglang_select/python/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=proglang_select +# Include the common configuration for this transform +include ../transform.config venv:: .transforms.python-venv diff --git a/transforms/code/proglang_select/ray/Makefile b/transforms/code/proglang_select/ray/Makefile index 82db54db71..20315a2347 100644 --- a/transforms/code/proglang_select/ray/Makefile +++ b/transforms/code/proglang_select/ray/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=proglang_select +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/code/proglang_select/transform.config b/transforms/code/proglang_select/transform.config new file mode 100644 index 0000000000..c32cb9775b --- /dev/null +++ b/transforms/code/proglang_select/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=proglang_select + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +PROGLANG_SELECT_PYTHON_VERSION=$(DPK_VERSION) +PROGLANG_SELECT_RAY_VERSION=$(PROGLANG_SELECT_PYTHON_VERSION) +PROGLANG_SELECT_SPARK_VERSION=$(PROGLANG_SELECT_PYTHON_VERSION) + diff --git a/transforms/code/repo_level_ordering/kfp_ray/Makefile.disable-cicd b/transforms/code/repo_level_ordering/kfp_ray/Makefile.disable-cicd index ef3765e31b..5b2425357c 100644 --- a/transforms/code/repo_level_ordering/kfp_ray/Makefile.disable-cicd +++ b/transforms/code/repo_level_ordering/kfp_ray/Makefile.disable-cicd @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/code/repo_level_ordering/ray/Makefile b/transforms/code/repo_level_ordering/ray/Makefile index 83f8692de3..8d2f784fb2 100644 --- a/transforms/code/repo_level_ordering/ray/Makefile +++ b/transforms/code/repo_level_ordering/ray/Makefile @@ -1,15 +1,23 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - include $(REPOROOT)/transforms/.make.transforms -BASE_IMAGE=$(RAY_BASE_IMAGE) +# Include the common configuration for this transform +include ../transform.config -TRANSFORM_NAME=repo_level_order +BASE_IMAGE=$(RAY_BASE_IMAGE) venv:: .transforms.ray-venv diff --git a/transforms/code/repo_level_ordering/transform.config b/transforms/code/repo_level_ordering/transform.config new file mode 100644 index 0000000000..0d82c6377e --- /dev/null +++ b/transforms/code/repo_level_ordering/transform.config @@ -0,0 +1,19 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=repo_level_order + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +REPO_LVL_ORDER_RAY_VERSION=$(DPK_VERSION) + + diff --git a/transforms/code/syntactic_concept_extractor/python/Makefile b/transforms/code/syntactic_concept_extractor/python/Makefile index c0cc966379..87d5b46bb7 100644 --- a/transforms/code/syntactic_concept_extractor/python/Makefile +++ b/transforms/code/syntactic_concept_extractor/python/Makefile @@ -3,14 +3,12 @@ REPOROOT=../../../.. # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free -# to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME=syntactic_concept_extractor - +# to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + # values possible mach-arm64, x86_64 export RUNTIME_HOST_ARCH=x86_64 diff --git a/transforms/code/syntactic_concept_extractor/ray/Makefile b/transforms/code/syntactic_concept_extractor/ray/Makefile index ecc3a34dcc..bca844f7a9 100644 --- a/transforms/code/syntactic_concept_extractor/ray/Makefile +++ b/transforms/code/syntactic_concept_extractor/ray/Makefile @@ -7,7 +7,8 @@ REPOROOT=../../../.. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=syntactic_concept_extractor +# Include the common configuration for this transform +include ../transform.config # values possible mach-arm64, x86_64 export RUNTIME_HOST_ARCH=x86_64 @@ -15,8 +16,6 @@ export RUNTIME_HOST_ARCH=x86_64 BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv -venv:: .transforms.python-venv - test:: .transforms.ray-test clean:: .transforms.clean diff --git a/transforms/code/syntactic_concept_extractor/transform.config b/transforms/code/syntactic_concept_extractor/transform.config new file mode 100644 index 0000000000..520069931f --- /dev/null +++ b/transforms/code/syntactic_concept_extractor/transform.config @@ -0,0 +1,19 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=syntactic_concept_extractor + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +SYNTACTIC_CONCEPT_EXTRACTOR_PYTHON_VERSION=$(DPK_VERSION) +SYNTACTIC_CONCEPT_EXTRACTOR_RAY_VERSION=$(CODE2PARQUET_PYTHON_VERSION) + diff --git a/transforms/language/doc_chunk/kfp_ray/Makefile b/transforms/language/doc_chunk/kfp_ray/Makefile index 189b36ea5d..30e912e338 100644 --- a/transforms/language/doc_chunk/kfp_ray/Makefile +++ b/transforms/language/doc_chunk/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/language/doc_chunk/python/Makefile b/transforms/language/doc_chunk/python/Makefile index a6fbe35dc8..2f2a7e7895 100644 --- a/transforms/language/doc_chunk/python/Makefile +++ b/transforms/language/doc_chunk/python/Makefile @@ -1,14 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=doc_chunk +# Include the common configuration for this transform +include ../transform.config LINUX_WITH_CPU_TORCH?=true OS := $(shell uname -s) diff --git a/transforms/language/doc_chunk/ray/Makefile b/transforms/language/doc_chunk/ray/Makefile index 6b9b4ae6a2..b4f394f847 100644 --- a/transforms/language/doc_chunk/ray/Makefile +++ b/transforms/language/doc_chunk/ray/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=doc_chunk +# Include the common configuration for this transform +include ../transform.config LINUX_WITH_CPU_TORCH?=true OS := $(shell uname -s) diff --git a/transforms/language/doc_chunk/transform.config b/transforms/language/doc_chunk/transform.config new file mode 100644 index 0000000000..f433f360be --- /dev/null +++ b/transforms/language/doc_chunk/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=doc_chunk + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +DOC_CHUNK_PYTHON_VERSION=$(DPK_VERSION) +DOC_CHUNK_RAY_VERSION=$(DOC_CHUNK_PYTHON_VERSION) +DOC_CHUNK_SPARK_VERSION=$(DOC_CHUNK_PYTHON_VERSION) + diff --git a/transforms/language/doc_quality/kfp_ray/Makefile b/transforms/language/doc_quality/kfp_ray/Makefile index 004f176162..9f5e936159 100644 --- a/transforms/language/doc_quality/kfp_ray/Makefile +++ b/transforms/language/doc_quality/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/language/doc_quality/python/Makefile b/transforms/language/doc_quality/python/Makefile index 684ce47aee..f0f3094009 100644 --- a/transforms/language/doc_quality/python/Makefile +++ b/transforms/language/doc_quality/python/Makefile @@ -1,16 +1,22 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME=doc_quality - include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + venv:: .transforms.python-venv test:: .transforms.test-src test-image diff --git a/transforms/language/doc_quality/ray/Makefile b/transforms/language/doc_quality/ray/Makefile index d462543a18..dd278af885 100644 --- a/transforms/language/doc_quality/ray/Makefile +++ b/transforms/language/doc_quality/ray/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=doc_quality -# $(REPOROOT)/.make.versions file contains the versions +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/language/doc_quality/transform.config b/transforms/language/doc_quality/transform.config new file mode 100644 index 0000000000..2ece0e0715 --- /dev/null +++ b/transforms/language/doc_quality/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=doc_quality + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +DOC_QUALITY_PYTHON_VERSION=$(DPK_VERSION) +DOC_QUALITY_RAY_VERSION=$(DOC_QUALITY_PYTHON_VERSION) +DOC_QUALITY_SPARK_VERSION=$(DOC_QUALITY_PYTHON_VERSION) + diff --git a/transforms/language/html2parquet/python/Makefile b/transforms/language/html2parquet/python/Makefile index 0e552d5bed..284bb8e8a6 100644 --- a/transforms/language/html2parquet/python/Makefile +++ b/transforms/language/html2parquet/python/Makefile @@ -1,16 +1,22 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME= html2parquet - include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + venv:: .transforms.python-venv test:: .transforms.python-test diff --git a/transforms/language/html2parquet/ray/Makefile b/transforms/language/html2parquet/ray/Makefile index 30c9082598..1667be8b98 100644 --- a/transforms/language/html2parquet/ray/Makefile +++ b/transforms/language/html2parquet/ray/Makefile @@ -1,15 +1,22 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. +include $(REPOROOT)/transforms/.make.transforms -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME= html2parquet +# Include the common configuration for this transform +include ../transform.config -include $(REPOROOT)/transforms/.make.transforms BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv @@ -59,3 +66,6 @@ kind-load-image:: .transforms.kind-load-image docker-load-image: .defaults.docker-load-image docker-save-image: .defaults.docker-save-image + + + diff --git a/transforms/language/html2parquet/ray/requirements.txt b/transforms/language/html2parquet/ray/requirements.txt index dc2111e9e1..dc796d6028 100644 --- a/transforms/language/html2parquet/ray/requirements.txt +++ b/transforms/language/html2parquet/ray/requirements.txt @@ -1,3 +1,2 @@ dpk-html2parquet-transform-python==0.2.2.dev0 data-prep-toolkit-ray==0.2.2.dev0 -trafilatura==1.12.0 \ No newline at end of file diff --git a/transforms/language/html2parquet/transform.config b/transforms/language/html2parquet/transform.config new file mode 100644 index 0000000000..10847c6aff --- /dev/null +++ b/transforms/language/html2parquet/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=html2parquet + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +HTML2PARQUET_PYTHON_VERSION=$(DPK_VERSION) +HTML2PARQUET_RAY_VERSION=$(HTML2PARQUET_PYTHON_VERSION) +HTML2PARQUET_SPARK_VERSION=$(HTML2PARQUET_PYTHON_VERSION) + diff --git a/transforms/language/lang_id/kfp_ray/Makefile b/transforms/language/lang_id/kfp_ray/Makefile index b8f11ffc8f..fd2c42d8e4 100644 --- a/transforms/language/lang_id/kfp_ray/Makefile +++ b/transforms/language/lang_id/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/language/lang_id/python/Makefile b/transforms/language/lang_id/python/Makefile index 441f6093d1..972ccb729d 100644 --- a/transforms/language/lang_id/python/Makefile +++ b/transforms/language/lang_id/python/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. +include $(REPOROOT)/transforms/.make.transforms -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME=lang_id +# Include the common configuration for this transform +include ../transform.config include $(REPOROOT)/transforms/.make.transforms diff --git a/transforms/language/lang_id/ray/Makefile b/transforms/language/lang_id/ray/Makefile index 6b0e307d7e..1339af9649 100644 --- a/transforms/language/lang_id/ray/Makefile +++ b/transforms/language/lang_id/ray/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=lang_id +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/language/lang_id/transform.config b/transforms/language/lang_id/transform.config new file mode 100644 index 0000000000..3a969f41df --- /dev/null +++ b/transforms/language/lang_id/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=lang_id + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +LANG_ID_PYTHON_VERSION=$(DPK_VERSION) +LANG_ID_RAY_VERSION=$(LANG_ID_PYTHON_VERSION) +LANG_ID_SPARK_VERSION=$(LANG_ID_PYTHON_VERSION) + diff --git a/transforms/language/pdf2parquet/kfp_ray/Makefile b/transforms/language/pdf2parquet/kfp_ray/Makefile index 24154bffa1..66edd91fca 100644 --- a/transforms/language/pdf2parquet/kfp_ray/Makefile +++ b/transforms/language/pdf2parquet/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/language/pdf2parquet/python/Makefile b/transforms/language/pdf2parquet/python/Makefile index 0e06a59003..b18b068ac7 100644 --- a/transforms/language/pdf2parquet/python/Makefile +++ b/transforms/language/pdf2parquet/python/Makefile @@ -1,14 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=pdf2parquet +# Include the common configuration for this transform +include ../transform.config RUN_ARGS=" --data_local_config \"{ 'input_folder' : '../test-data/input', 'output_folder' : '../output'}\" \ --data_files_to_use \"['.pdf','.zip']\" " diff --git a/transforms/language/pdf2parquet/ray/Makefile b/transforms/language/pdf2parquet/ray/Makefile index fba43ea15f..ced1f45f17 100644 --- a/transforms/language/pdf2parquet/ray/Makefile +++ b/transforms/language/pdf2parquet/ray/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=pdf2parquet +# Include the common configuration for this transform +include ../transform.config LINUX_WITH_CPU_TORCH?=true OS := $(shell uname -s) diff --git a/transforms/language/pdf2parquet/transform.config b/transforms/language/pdf2parquet/transform.config new file mode 100644 index 0000000000..1bda1908ec --- /dev/null +++ b/transforms/language/pdf2parquet/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=pdf2parquet + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +PDF2PARQUET_PYTHON_VERSION=$(DPK_VERSION) +PDF2PARQUET_RAY_VERSION=$(PDF2PARQUET_PYTHON_VERSION) +PDF2PARQUET_SPARK_VERSION=$(PDF2PARQUET_PYTHON_VERSION) + diff --git a/transforms/language/pii_redactor/kfp_ray/Makefile b/transforms/language/pii_redactor/kfp_ray/Makefile index 77844a79ee..370f85cb02 100644 --- a/transforms/language/pii_redactor/kfp_ray/Makefile +++ b/transforms/language/pii_redactor/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/language/pii_redactor/python/Makefile b/transforms/language/pii_redactor/python/Makefile index 28fd33fff9..50161da6ee 100644 --- a/transforms/language/pii_redactor/python/Makefile +++ b/transforms/language/pii_redactor/python/Makefile @@ -1,16 +1,22 @@ -# Define the root of the local git clone for the common rules to be able +# Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. -# Include a library of common .transform.* targets which most -# transforms should be able to reuse. However, feel free -# to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions -TRANSFORM_NAME=pii_redactor +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 +# Include a library of common .transform.* targets which most +# transforms should be able to reuse. However, feel free +# to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + venv:: .transforms.python-venv test:: .transforms.python-test diff --git a/transforms/language/pii_redactor/ray/Makefile b/transforms/language/pii_redactor/ray/Makefile index 3a67b90b8a..e524945340 100644 --- a/transforms/language/pii_redactor/ray/Makefile +++ b/transforms/language/pii_redactor/ray/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=pii_redactor +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/language/pii_redactor/transform.config b/transforms/language/pii_redactor/transform.config new file mode 100644 index 0000000000..c06adf82c6 --- /dev/null +++ b/transforms/language/pii_redactor/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=pii_redactor + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +PII_REDACTOR_PYTHON_VERSION=$(DPK_VERSION) +PII_REDACTOR_RAY_VERSION=$(PII_REDACTOR_PYTHON_VERSION) +PII_REDACTOR_SPARK_VERSION=$(PII_REDACTOR_PYTHON_VERSION) + diff --git a/transforms/language/text_encoder/kfp_ray/Makefile.disable-cicd b/transforms/language/text_encoder/kfp_ray/Makefile.disable-cicd index 70613cc010..36bd475607 100644 --- a/transforms/language/text_encoder/kfp_ray/Makefile.disable-cicd +++ b/transforms/language/text_encoder/kfp_ray/Makefile.disable-cicd @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/language/text_encoder/python/Makefile b/transforms/language/text_encoder/python/Makefile index c9e8b8c1bb..564bb405b0 100644 --- a/transforms/language/text_encoder/python/Makefile +++ b/transforms/language/text_encoder/python/Makefile @@ -1,14 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=text_encoder +# Include the common configuration for this transform +include ../transform.config LINUX_WITH_CPU_TORCH?=true OS := $(shell uname -s) diff --git a/transforms/language/text_encoder/ray/Makefile b/transforms/language/text_encoder/ray/Makefile index b95b299c48..85cf45cac3 100644 --- a/transforms/language/text_encoder/ray/Makefile +++ b/transforms/language/text_encoder/ray/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=text_encoder +# Include the common configuration for this transform +include ../transform.config LINUX_WITH_CPU_TORCH?=true OS := $(shell uname -s) diff --git a/transforms/language/text_encoder/transform.config b/transforms/language/text_encoder/transform.config new file mode 100644 index 0000000000..df5754fb8a --- /dev/null +++ b/transforms/language/text_encoder/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=text_encoder + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +TEXT_ENCODER_PYTHON_VERSION=$(DPK_VERSION) +TEXT_ENCODER_RAY_VERSION=$(TEXT_ENCODER_PYTHON_VERSION) +TEXT_ENCODER_SPARK_VERSION=$(TEXT_ENCODER_PYTHON_VERSION) + diff --git a/transforms/packaging/.make.packaging b/transforms/packaging/.make.packaging index 5268889d05..29506aaf16 100644 --- a/transforms/packaging/.make.packaging +++ b/transforms/packaging/.make.packaging @@ -2,6 +2,11 @@ ifndef T_SET T_SET=all endif +# Defines the version of the wheel for the package transforms +# If you change this value, you will need to run "make set-versions" to +# apply the new version number to the toml files. +DPK_TRANSFORMS_VERSION=$(DPK_VERSION) + venv: $(MAKE) .defaults.create-venv diff --git a/transforms/universal/doc_id/kfp_ray/Makefile b/transforms/universal/doc_id/kfp_ray/Makefile index 94fc751458..f170326e28 100644 --- a/transforms/universal/doc_id/kfp_ray/Makefile +++ b/transforms/universal/doc_id/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') @@ -48,4 +51,4 @@ workflow-test: workflow-build workflow-upload: workflow-build @for file in $(YAML_WF); do \ $(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \ - done \ No newline at end of file + done diff --git a/transforms/universal/doc_id/python/Makefile b/transforms/universal/doc_id/python/Makefile index 1f7d0d353f..26da1fc8fa 100644 --- a/transforms/universal/doc_id/python/Makefile +++ b/transforms/universal/doc_id/python/Makefile @@ -1,16 +1,22 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME=doc_id - include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + venv:: .transforms.python-venv test:: .transforms.python-test diff --git a/transforms/universal/doc_id/ray/Makefile b/transforms/universal/doc_id/ray/Makefile index d7844f2f9e..79787406b0 100644 --- a/transforms/universal/doc_id/ray/Makefile +++ b/transforms/universal/doc_id/ray/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=doc_id +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/universal/doc_id/spark/Makefile b/transforms/universal/doc_id/spark/Makefile index 954786dac5..9303d021f7 100644 --- a/transforms/universal/doc_id/spark/Makefile +++ b/transforms/universal/doc_id/spark/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=doc_id +# Include the common configuration for this transform +include ../transform.config venv:: .transforms.spark-venv diff --git a/transforms/universal/doc_id/transform.config b/transforms/universal/doc_id/transform.config new file mode 100644 index 0000000000..d3715f3b20 --- /dev/null +++ b/transforms/universal/doc_id/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=doc_id + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +DOC_ID_PYTHON_VERSION=$(DPK_VERSION) +DOC_ID_RAY_VERSION=$(DOC_ID_PYTHON_VERSION) +DOC_ID_SPARK_VERSION=$(DOC_ID_PYTHON_VERSION) + diff --git a/transforms/universal/ededup/kfp_ray/Makefile b/transforms/universal/ededup/kfp_ray/Makefile index 456cf76d14..f0c5cc217a 100644 --- a/transforms/universal/ededup/kfp_ray/Makefile +++ b/transforms/universal/ededup/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') @@ -47,4 +50,4 @@ workflow-test: workflow-build workflow-upload: workflow-build @for file in $(YAML_WF); do \ $(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \ - done \ No newline at end of file + done diff --git a/transforms/universal/ededup/python/Makefile b/transforms/universal/ededup/python/Makefile index 92f3fac27d..348edc74db 100644 --- a/transforms/universal/ededup/python/Makefile +++ b/transforms/universal/ededup/python/Makefile @@ -1,16 +1,22 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME=ededup - include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + venv:: .transforms.python-venv test:: .transforms.python-test diff --git a/transforms/universal/ededup/ray/Makefile b/transforms/universal/ededup/ray/Makefile index f828e107e8..1ff055e295 100644 --- a/transforms/universal/ededup/ray/Makefile +++ b/transforms/universal/ededup/ray/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=ededup +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/universal/ededup/transform.config b/transforms/universal/ededup/transform.config new file mode 100644 index 0000000000..12f5357f15 --- /dev/null +++ b/transforms/universal/ededup/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=ededup + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +EDEDUP_PYTHON_VERSION=$(DPK_VERSION) +EDEDUP_RAY_VERSION=$(EDEDUP_PYTHON_VERSION) +EDEDUP_SPARK_VERSION=$(EDEDUP_PYTHON_VERSION) + diff --git a/transforms/universal/fdedup/kfp_ray/Makefile b/transforms/universal/fdedup/kfp_ray/Makefile index f6b2159841..55f7851f6c 100644 --- a/transforms/universal/fdedup/kfp_ray/Makefile +++ b/transforms/universal/fdedup/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') @@ -48,4 +51,4 @@ workflow-test: workflow-build workflow-upload: workflow-build @for file in $(YAML_WF); do \ $(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \ - done \ No newline at end of file + done diff --git a/transforms/universal/fdedup/ray/Makefile b/transforms/universal/fdedup/ray/Makefile index 15173ba009..f5f06c3c32 100644 --- a/transforms/universal/fdedup/ray/Makefile +++ b/transforms/universal/fdedup/ray/Makefile @@ -1,14 +1,24 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=fdedup +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} + venv:: .transforms.ray-venv test:: .transforms.ray-test diff --git a/transforms/universal/fdedup/transform.config b/transforms/universal/fdedup/transform.config new file mode 100644 index 0000000000..774716e154 --- /dev/null +++ b/transforms/universal/fdedup/transform.config @@ -0,0 +1,18 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=fdedup + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +FDEDUP_RAY_VERSION=$(DPK_VERSION) + diff --git a/transforms/universal/filter/kfp_ray/Makefile b/transforms/universal/filter/kfp_ray/Makefile index bd26792be8..c48298d22a 100644 --- a/transforms/universal/filter/kfp_ray/Makefile +++ b/transforms/universal/filter/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/universal/filter/python/Makefile b/transforms/universal/filter/python/Makefile index 1ea1151ce5..9a01deea1d 100644 --- a/transforms/universal/filter/python/Makefile +++ b/transforms/universal/filter/python/Makefile @@ -1,16 +1,22 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME=filter - include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + venv:: .transforms.python-venv test:: .transforms.python-test diff --git a/transforms/universal/filter/ray/Makefile b/transforms/universal/filter/ray/Makefile index 5960a96707..0c0af00047 100644 --- a/transforms/universal/filter/ray/Makefile +++ b/transforms/universal/filter/ray/Makefile @@ -1,13 +1,21 @@ - # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=filter +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/universal/filter/spark/Makefile b/transforms/universal/filter/spark/Makefile index 329da35a25..72bc78a15b 100644 --- a/transforms/universal/filter/spark/Makefile +++ b/transforms/universal/filter/spark/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -# This is included in the image name, if defined -TRANSFORM_NAME=filter +# Include the common configuration for this transform +include ../transform.config venv:: .transforms.spark-venv diff --git a/transforms/universal/filter/transform.config b/transforms/universal/filter/transform.config new file mode 100644 index 0000000000..70f2ada5b6 --- /dev/null +++ b/transforms/universal/filter/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=filter + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +FILTER_PYTHON_VERSION=$(DPK_VERSION) +FILTER_RAY_VERSION=$(FILTER_PYTHON_VERSION) +FILTER_SPARK_VERSION=$(FILTER_PYTHON_VERSION) + diff --git a/transforms/universal/hap/python/Makefile b/transforms/universal/hap/python/Makefile index c7c15dba75..2363e51c2d 100644 --- a/transforms/universal/hap/python/Makefile +++ b/transforms/universal/hap/python/Makefile @@ -1,15 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -#TRANSFORM_RUNTIME_SRC_FILE=hap_transform_python.py -TRANSFORM_NAME=hap - -HAP_PYTHON_VERSION= $(DPK_VERSION) +# Include the common configuration for this transform +include ../transform.config venv:: .transforms.python-venv diff --git a/transforms/universal/hap/transform.config b/transforms/universal/hap/transform.config new file mode 100644 index 0000000000..6aa7018b30 --- /dev/null +++ b/transforms/universal/hap/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=hap + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +HAP_PYTHON_VERSION=$(DPK_VERSION) +HAP_RAY_VERSION=$(HAP_PYTHON_VERSION) +HAP_SPARK_VERSION=$(HAP_PYTHON_VERSION) + diff --git a/transforms/universal/noop/kfp_ray/Makefile b/transforms/universal/noop/kfp_ray/Makefile index d1198e5a2a..fc541f3677 100644 --- a/transforms/universal/noop/kfp_ray/Makefile +++ b/transforms/universal/noop/kfp_ray/Makefile @@ -1,7 +1,11 @@ REPOROOT=${CURDIR}/../../../../ + WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/universal/noop/python/Makefile b/transforms/universal/noop/python/Makefile index 80797bcc91..5e6121b04d 100644 --- a/transforms/universal/noop/python/Makefile +++ b/transforms/universal/noop/python/Makefile @@ -1,15 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=noop - +# Include the common configuration for this transform +include ../transform.config venv:: .transforms.python-venv diff --git a/transforms/universal/noop/ray/Makefile b/transforms/universal/noop/ray/Makefile index 0b70f66629..ad7ff33201 100644 --- a/transforms/universal/noop/ray/Makefile +++ b/transforms/universal/noop/ray/Makefile @@ -1,15 +1,24 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=noop +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} + venv:: .transforms.ray-venv test:: .transforms.ray-test diff --git a/transforms/universal/noop/spark/Makefile b/transforms/universal/noop/spark/Makefile index 726fd9e6a5..ebc72992e0 100644 --- a/transforms/universal/noop/spark/Makefile +++ b/transforms/universal/noop/spark/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=noop +# Include the common configuration for this transform +include ../transform.config venv:: .transforms.spark-venv diff --git a/transforms/universal/noop/transform.config b/transforms/universal/noop/transform.config new file mode 100644 index 0000000000..49c9b2cbf5 --- /dev/null +++ b/transforms/universal/noop/transform.config @@ -0,0 +1,21 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=noop + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +NOOP_PYTHON_VERSION=$(DPK_VERSION) +NOOP_RAY_VERSION=$(NOOP_PYTHON_VERSION) +NOOP_SPARK_VERSION=$(NOOP_PYTHON_VERSION) + diff --git a/transforms/universal/profiler/kfp_ray/Makefile b/transforms/universal/profiler/kfp_ray/Makefile index 2fbd17653b..e4f6b860b2 100644 --- a/transforms/universal/profiler/kfp_ray/Makefile +++ b/transforms/universal/profiler/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') diff --git a/transforms/universal/profiler/python/Makefile b/transforms/universal/profiler/python/Makefile index 61c807a232..9832501843 100644 --- a/transforms/universal/profiler/python/Makefile +++ b/transforms/universal/profiler/python/Makefile @@ -1,16 +1,22 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME=profiler - include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + venv:: .transforms.python-venv test:: .transforms.python-test diff --git a/transforms/universal/profiler/ray/Makefile b/transforms/universal/profiler/ray/Makefile index 8cec289685..12d75c4c3e 100644 --- a/transforms/universal/profiler/ray/Makefile +++ b/transforms/universal/profiler/ray/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=profiler +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/universal/profiler/spark/Makefile b/transforms/universal/profiler/spark/Makefile index cb90b4020f..39b16cac6e 100644 --- a/transforms/universal/profiler/spark/Makefile +++ b/transforms/universal/profiler/spark/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=profiler +# Include the common configuration for this transform +include ../transform.config venv:: .transforms.spark-venv diff --git a/transforms/universal/profiler/transform.config b/transforms/universal/profiler/transform.config new file mode 100644 index 0000000000..c86cd64156 --- /dev/null +++ b/transforms/universal/profiler/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=profiler + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +PROFILER_PYTHON_VERSION=$(DPK_VERSION) +PROFILER_RAY_VERSION=$(PROFILER_PYTHON_VERSION) +PROFILER_SPARK_VERSION=$(PROFILER_PYTHON_VERSION) + diff --git a/transforms/universal/resize/kfp_ray/Makefile b/transforms/universal/resize/kfp_ray/Makefile index a0e2faf378..8c7e592afa 100644 --- a/transforms/universal/resize/kfp_ray/Makefile +++ b/transforms/universal/resize/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') @@ -48,4 +51,4 @@ workflow-test: workflow-build workflow-upload: workflow-build @for file in $(YAML_WF); do \ $(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \ - done \ No newline at end of file + done diff --git a/transforms/universal/resize/python/Makefile b/transforms/universal/resize/python/Makefile index 7de0032e3d..66453c8464 100644 --- a/transforms/universal/resize/python/Makefile +++ b/transforms/universal/resize/python/Makefile @@ -1,16 +1,22 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - -# $(REPOROOT)/.make.versions file contains the versions - -TRANSFORM_NAME=resize - include $(REPOROOT)/transforms/.make.transforms +# Include the common configuration for this transform +include ../transform.config + venv:: .transforms.python-venv test:: .transforms.python-test diff --git a/transforms/universal/resize/ray/Makefile b/transforms/universal/resize/ray/Makefile index 1a2f2496f7..dd229b3f48 100644 --- a/transforms/universal/resize/ray/Makefile +++ b/transforms/universal/resize/ray/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free -# to override/redefine the rules below. - +# to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=resize +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/universal/resize/spark/Makefile b/transforms/universal/resize/spark/Makefile index f02e9db3f2..18d72d31d9 100644 --- a/transforms/universal/resize/spark/Makefile +++ b/transforms/universal/resize/spark/Makefile @@ -1,13 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. - include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=resize +# Include the common configuration for this transform +include ../transform.config venv:: .transforms.spark-venv diff --git a/transforms/universal/resize/transform.config b/transforms/universal/resize/transform.config new file mode 100644 index 0000000000..4b7171a4e2 --- /dev/null +++ b/transforms/universal/resize/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=resize + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +RESIZE_PYTHON_VERSION=$(DPK_VERSION) +RESIZE_RAY_VERSION=$(RESIZE_PYTHON_VERSION) +RESIZE_SPARK_VERSION=$(RESIZE_PYTHON_VERSION) + diff --git a/transforms/universal/tokenization/kfp_ray/Makefile b/transforms/universal/tokenization/kfp_ray/Makefile index 09656297a8..c43105ff17 100644 --- a/transforms/universal/tokenization/kfp_ray/Makefile +++ b/transforms/universal/tokenization/kfp_ray/Makefile @@ -2,6 +2,9 @@ REPOROOT=${CURDIR}/../../../../ WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate include $(REPOROOT)/transforms/.make.workflows +# Include the common configuration for this transform +include ../transform.config + SRC_DIR=${CURDIR}/../ray/ PYTHON_WF := $(shell find ./ -name '*_wf.py') @@ -48,4 +51,4 @@ workflow-test: workflow-build workflow-upload: workflow-build @for file in $(YAML_WF); do \ $(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \ - done \ No newline at end of file + done diff --git a/transforms/universal/tokenization/python/Makefile b/transforms/universal/tokenization/python/Makefile index d236619838..8f4f7fbf5a 100644 --- a/transforms/universal/tokenization/python/Makefile +++ b/transforms/universal/tokenization/python/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=tokenization +# Include the common configuration for this transform +include ../transform.config venv:: .transforms.python-venv diff --git a/transforms/universal/tokenization/ray/Makefile b/transforms/universal/tokenization/ray/Makefile index 3d5a46d09c..0a4e3a370c 100644 --- a/transforms/universal/tokenization/ray/Makefile +++ b/transforms/universal/tokenization/ray/Makefile @@ -1,12 +1,21 @@ # Define the root of the local git clone for the common rules to be able # know where they are running from. REPOROOT=../../../.. + +# Set this, before including .make.defaults, to +# 1 if requirements reference the latest code in the data processing library +# in this repo (that is not yet published to pypi). This is the default setting. +# 0 if the transforms DPK dependencies are on wheels published to +# pypi (e.g. data-prep-toolkit=0.2.1) +#USE_REPO_LIB_SRC=1 + # Include a library of common .transform.* targets which most # transforms should be able to reuse. However, feel free # to override/redefine the rules below. include $(REPOROOT)/transforms/.make.transforms -TRANSFORM_NAME=tokenization +# Include the common configuration for this transform +include ../transform.config BASE_IMAGE=${RAY_BASE_IMAGE} venv:: .transforms.ray-venv diff --git a/transforms/universal/tokenization/transform.config b/transforms/universal/tokenization/transform.config new file mode 100644 index 0000000000..04f517d426 --- /dev/null +++ b/transforms/universal/tokenization/transform.config @@ -0,0 +1,20 @@ +# +# This is intended to be included across the Makefiles provided within +# a given transform's directory tree, so must use compatible syntax. +# +################################################################################ +# This defines the name of the transform and is used to match against +# expected files and is used to define the transform's image name. +TRANSFORM_NAME=tokenization + +################################################################################ +# This defines the transforms' version number as would be used +# when publishing the wheel. In general, only the micro version +# number should be advanced relative to the DPK_VERSION. +# +# If you change the versions numbers, be sure to run "make set-versions" to +# update version numbers across the transform (e.g., pyproject.toml). +TOKENIZATION_PYTHON_VERSION=$(DPK_VERSION) +TOKENIZATION_RAY_VERSION=$(TOKENIZATION_PYTHON_VERSION) +TOKENIZATION_SPARK_VERSION=$(TOKENIZATION_PYTHON_VERSION) +