From 5dceb177356187b1a84807aa52595f3da081759a Mon Sep 17 00:00:00 2001 From: Julien Cuquemelle Date: Wed, 24 May 2023 18:31:40 +0200 Subject: [PATCH] Drop python3.6 support (#108) * Drop python3.6 support --- .github/workflows/main.yml | 10 +++---- requirements.txt | 2 +- setup.py | 11 ++++---- tests/tensorflow/test_evaluator_task.py | 26 +++++++++---------- tests/test_client.py | 10 +++---- tf_yarn/bin/check_hadoop_env.py | 2 +- .../examples/collective_all_reduce_example.py | 6 ++--- tf_yarn/examples/linear_classifier_example.py | 2 +- tf_yarn/examples/mlflow_example.py | 2 +- .../native_keras_with_gloo_example.py | 6 ++--- tf_yarn/examples/pytorch/pytorch_example.py | 2 +- tf_yarn/examples/run_examples.sh | 25 +++++++++--------- tf_yarn/examples/run_pytorch_examples.sh | 8 +++--- 13 files changed, 57 insertions(+), 55 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 66336eb..165f7e4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -8,14 +8,14 @@ on: jobs: lint: - runs-on: ubuntu-20.04 # Latest version supporting Python 3.6 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python 3.6 + - name: Set up Python 3.9 uses: actions/setup-python@v3 with: - python-version: 3.6 + python-version: 3.9 - name: Install dependencies run: | pip install --upgrade pip @@ -30,11 +30,11 @@ jobs: build: - runs-on: ubuntu-20.04 # Latest version supporting Python 3.6 + runs-on: ubuntu-latest # Latest version supporting Python 3.6 strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: [3.7, 3.8, 3.9] steps: - uses: actions/checkout@v3 diff --git a/requirements.txt b/requirements.txt index be4a88d..7082f36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ cluster-pack>=0.2.19 skein>=0.8,<0.9 tensorboard +tensorflow_io pyarrow psutil -protobuf<4.21.0;python_version<"3.7" diff --git a/setup.py b/setup.py index 914d3d0..0d4c293 100755 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ here = os.path.abspath(os.path.dirname(__file__)) -DESCRIPTION = "Distributed TensorFlow on a YARN cluster" +DESCRIPTION = "Distributed TensorFlow or pythorch on a YARN cluster" try: LONG_DESCRIPTION = open(os.path.join(here, "README.md"), encoding="utf-8").read() @@ -28,8 +28,9 @@ def _read_reqs(relpath): "Environment :: Console", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Topic :: Software Development :: Libraries" ] @@ -46,14 +47,14 @@ def _check_add_criteo_environment(package_name): packages=setuptools.find_packages(), include_package_data=True, package_data={"tf_yarn": ["default.log.conf"]}, - version=_check_add_criteo_environment("0.6.9"), + version=_check_add_criteo_environment("0.7.0"), install_requires=REQUIREMENTS, tests_require=["pytest", "hadoop-test-cluster"], extras_require={ "cpu": ["tensorflow-io[tensorflow]>=0.8.0"], "gpu": ["tensorflow-io[tensorflow]>=0.8.0"], }, - python_requires=">=3.6", + python_requires=">=3.7", maintainer="Criteo", maintainer_email="github@criteo.com", @@ -61,7 +62,7 @@ def _check_add_criteo_environment(package_name): long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", classifiers=CLASSIFIERS, - keywords="tensorflow yarn", + keywords="tensorflow pytorch yarn", url="https://github.com/criteo/tf-yarn", entry_points={'console_scripts': [ 'check_hadoop_env = tf_yarn.bin.check_hadoop_env:main', diff --git a/tests/tensorflow/test_evaluator_task.py b/tests/tensorflow/test_evaluator_task.py index 4ed7eed..f1ac9a8 100644 --- a/tests/tensorflow/test_evaluator_task.py +++ b/tests/tensorflow/test_evaluator_task.py @@ -12,10 +12,10 @@ from tf_yarn.tensorflow.tasks.evaluator_task import _get_step checkpoints = { - "/path/to/model/dir/model.ckpt-0", - "/path/to/model/dir/model.ckpt-100", - "/path/to/model/dir/model.ckpt-200", - "/path/to/model/dir/model.ckpt-300" + "/path/to/model/dir/model.ckpt-0", + "/path/to/model/dir/model.ckpt-100", + "/path/to/model/dir/model.ckpt-200", + "/path/to/model/dir/model.ckpt-300" } @@ -29,16 +29,14 @@ ]) def test_evaluate(evaluated_ckpts, ckpt_to_export): with mock.patch('tf_yarn._task_commons._get_experiment') as experiment_mock, \ - mock.patch('tf_yarn.tensorflow.tasks.evaluator_task._get_evaluated_checkpoint') \ - as _get_evaluated_checkpoint, \ - mock.patch('tf_yarn.tensorflow.tasks.evaluator_task._get_all_checkpoints') \ - as _get_checkpoints, \ - mock.patch( - 'tf_yarn.tensorflow.tasks.evaluator_task.tf.io.gfile.exists' - ) as exists_mock, \ - mock.patch( - 'tf_yarn.tensorflow.tasks.evaluator_task.tf.io.gfile.listdir' - ) as listdir_mock: + mock.patch('tf_yarn.tensorflow.tasks.evaluator_task._get_evaluated_checkpoint') \ + as _get_evaluated_checkpoint, \ + mock.patch('tf_yarn.tensorflow.tasks.evaluator_task._get_all_checkpoints') \ + as _get_checkpoints, \ + mock.patch('tf_yarn.tensorflow.tasks.evaluator_task.tf.io.gfile.exists') as exists_mock, \ + mock.patch('tf_yarn.tensorflow.tasks.evaluator_task.tf.io.gfile.listdir') \ + as listdir_mock: + exists_mock.side_effect = lambda *args, **kwargs: True listdir_mock.side_effect = lambda *args, **kwargs: evaluated_ckpts mock_exporter = mock.Mock(spec=tf.estimator.Exporter) diff --git a/tests/test_client.py b/tests/test_client.py index 9647fbe..b74cc3f 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -200,11 +200,11 @@ def fail(*args, **kwargs): def test_container_log_status(): container_log_status = ContainerLogStatus( - {"chief:0": ("http://ec-0d-9a-00-3a-c0.pa4.hpc.criteo.preprod:8042/node/" - "containerlogs/container_e17294_1569204305368_264801_01_000002/myuser"), - "evaluator:0": ("http://ec-0d-9a-00-3a-c0.pa4.hpc.criteo.preprod:8042/node/" - "containerlogs/container_e95614_6456565654646_344343_01_000003/myuser")}, - {"chief:0": "SUCCEEDED", "evaluator:0": "FAILED"} + {"chief:0": ("http://ec-0d-9a-00-3a-c0.pa4.hpc.criteo.preprod:8042/node/" + "containerlogs/container_e17294_1569204305368_264801_01_000002/myuser"), + "evaluator:0": ("http://ec-0d-9a-00-3a-c0.pa4.hpc.criteo.preprod:8042/node/" + "containerlogs/container_e95614_6456565654646_344343_01_000003/myuser")}, + {"chief:0": "SUCCEEDED", "evaluator:0": "FAILED"} ) containers = container_log_status.by_container_id() diff --git a/tf_yarn/bin/check_hadoop_env.py b/tf_yarn/bin/check_hadoop_env.py index 4956794..7cfc31b 100644 --- a/tf_yarn/bin/check_hadoop_env.py +++ b/tf_yarn/bin/check_hadoop_env.py @@ -69,7 +69,7 @@ def launch_remote_check(file: str) -> Tuple[bool, str]: files.update({'tf_yarn': tf_yarn_zip}) service = skein.Service( script=f'./{archive_name} check_hadoop_env.py --file {file}', - resources=skein.Resources(2*1024, 1), + resources=skein.Resources(2 * 1024, 1), env={ 'PEX_ROOT': '/tmp/{uuid.uuid4()}/', 'PYTHONPATH': '.:', diff --git a/tf_yarn/examples/collective_all_reduce_example.py b/tf_yarn/examples/collective_all_reduce_example.py index 4240d55..d267c64 100644 --- a/tf_yarn/examples/collective_all_reduce_example.py +++ b/tf_yarn/examples/collective_all_reduce_example.py @@ -18,8 +18,8 @@ import horovod.tensorflow as hvd except (ModuleNotFoundError): logger.warning( - "horovod not installed. checkout " - "https://github.com/criteo/tf-yarn/blob/master/docs/HorovodWithGloo.md" + "horovod not installed. checkout " + "https://github.com/criteo/tf-yarn/blob/master/docs/HorovodWithGloo.md" ) pass @@ -33,7 +33,7 @@ 3. Pass a full URI to either of the CSV files to the example """ WINE_QUALITY_FILE = f"{cluster_pack.get_default_fs().replace('viewfs://', 'hdfs://')}" \ - f"/user/{USER}/tf_yarn_test/winequality-red.csv" + f"/user/{USER}/tf_yarn_test/winequality-red.csv" """ Output path of the learned model on hdfs diff --git a/tf_yarn/examples/linear_classifier_example.py b/tf_yarn/examples/linear_classifier_example.py index 34f1235..e4c5dbe 100644 --- a/tf_yarn/examples/linear_classifier_example.py +++ b/tf_yarn/examples/linear_classifier_example.py @@ -24,7 +24,7 @@ USER = getpass.getuser() WINE_QUALITY_FILE = f"{cluster_pack.get_default_fs().replace('viewfs://', 'hdfs://')}" \ - f"/user/{USER}/tf_yarn_test/winequality-red.csv" + f"/user/{USER}/tf_yarn_test/winequality-red.csv" # Output path of the learned model on hdfs HDFS_DIR = (f"{cluster_pack.get_default_fs().replace('viewfs://', 'hdfs://')}/user/{USER}" f"/tf_yarn_test/tf_yarn_{int(datetime.now().timestamp())}") diff --git a/tf_yarn/examples/mlflow_example.py b/tf_yarn/examples/mlflow_example.py index a62ec7d..6fd0e41 100644 --- a/tf_yarn/examples/mlflow_example.py +++ b/tf_yarn/examples/mlflow_example.py @@ -28,7 +28,7 @@ USER = getpass.getuser() WINE_QUALITY_FILE = f"{cluster_pack.get_default_fs().replace('viewfs://', 'hdfs://')}" \ - f"/user/{USER}/tf_yarn_test/winequality-red.csv" + f"/user/{USER}/tf_yarn_test/winequality-red.csv" # Output path of the learned model on hdfs HDFS_DIR = (f"{cluster_pack.get_default_fs().replace('viewfs://', 'hdfs://')}/user/{USER}" f"/tf_yarn_test/tf_yarn_{int(datetime.now().timestamp())}") diff --git a/tf_yarn/examples/native_keras_with_gloo_example.py b/tf_yarn/examples/native_keras_with_gloo_example.py index 64b65a1..23bb0d2 100644 --- a/tf_yarn/examples/native_keras_with_gloo_example.py +++ b/tf_yarn/examples/native_keras_with_gloo_example.py @@ -25,15 +25,15 @@ import horovod.tensorflow as hvd except (ModuleNotFoundError): logger.warning( - "horovod not installed. checkout " - "https://github.com/criteo/tf-yarn/blob/master/docs/HorovodWithGloo.md" + "horovod not installed. checkout " + "https://github.com/criteo/tf-yarn/blob/master/docs/HorovodWithGloo.md" ) pass USER = getpass.getuser() WINE_QUALITY_FILE = f"{cluster_pack.get_default_fs().replace('viewfs://', 'hdfs://')}" \ - f"/user/{USER}/tf_yarn_test/winequality-red.csv" + f"/user/{USER}/tf_yarn_test/winequality-red.csv" # Output path of the learned model on hdfs HDFS_DIR = (f"{cluster_pack.get_default_fs().replace('viewfs://', 'hdfs://')}/user/{USER}" diff --git a/tf_yarn/examples/pytorch/pytorch_example.py b/tf_yarn/examples/pytorch/pytorch_example.py index 3ef0997..e4459e2 100644 --- a/tf_yarn/examples/pytorch/pytorch_example.py +++ b/tf_yarn/examples/pytorch/pytorch_example.py @@ -99,7 +99,7 @@ def experiment_fn(): run_on_yarn( experiment_fn=experiment_fn, task_specs={ - "worker": TaskSpec(memory=48*2**10, vcores=48, instances=2, label=NodeLabel.GPU) + "worker": TaskSpec(memory=48 * 2 ** 10, vcores=48, instances=2, label=NodeLabel.GPU) }, pyenv_zip_path=zip_hdfs, queue="ml-gpu" diff --git a/tf_yarn/examples/run_examples.sh b/tf_yarn/examples/run_examples.sh index e0a9b02..f2e4ff0 100755 --- a/tf_yarn/examples/run_examples.sh +++ b/tf_yarn/examples/run_examples.sh @@ -18,27 +18,27 @@ do hdfs dfs -rm -r -f tf_yarn_test/tf_yarn_* # Setup environment - python3.6 -m venv tf-yarn_test_env + python3.9 -m venv tf-yarn_test_env . tf-yarn_test_env/bin/activate - pip install --upgrade pip setuptools wheel - pip install -e . + python3.9 -m pip install --upgrade pip setuptools wheel + python3.9 -m pip install -e . if [[ $tf_version == "1.15.2" ]]; then - pip install tensorflow-io==0.8.1 #also installs tensorflow==1.15.5 - pip install tensorflow==${version} # force the correct version of tf after install of tfio + python3.9 -m pip install tensorflow-io==0.8.1 #also installs tensorflow==1.15.5 + python3.9 -m pip install tensorflow==${version} # force the correct version of tf after install of tfio # https://github.com/pantsbuild/pex/issues/913 # only pex 2.1.1 is supported for tf 1.15 - pip install pex==2.1.1 + python3.9 -m pip install pex==2.1.1 #no version available for tf==2.5.2 - pip install horovod==0.19.2+criteo.${tf_version} + python3.9 -m pip install horovod==0.19.2+criteo.${tf_version} else - pip install tensorflow-io==0.19.1 # also installs tensorflow==2.5.2 + python3.9 -m pip install tensorflow-io==0.19.1 # also installs tensorflow==2.5.2 fi - pip install mlflow-skinny - export CRITEO_MLFLOW_TRACKING_URI="https://mlflow.da1.preprod.crto.in" + python3.9 -m pip install mlflow-skinny + export CRITEO_MLFLOW_TRACKING_URI="https://mlflow.preprod.crto.in" echo ' ' - pip freeze |grep -e tensor -e pex -e horovod + python3.9 -m pip freeze |grep -e tensor -e pex -e horovod echo ' ' # Setup specific to examples @@ -62,7 +62,7 @@ do continue fi echo "executing $example with tf=${tf_version} .." - python $example + python3.9 $example if ! [ $? -eq 0 ]; then exit_code=1 echo "error $example with tf=${tf_version}" @@ -72,6 +72,7 @@ do echo "=============================================" done popd + deactivate done exit $exit_code diff --git a/tf_yarn/examples/run_pytorch_examples.sh b/tf_yarn/examples/run_pytorch_examples.sh index 6361ec0..d15c23d 100755 --- a/tf_yarn/examples/run_pytorch_examples.sh +++ b/tf_yarn/examples/run_pytorch_examples.sh @@ -28,13 +28,14 @@ do python3.9 -m pip install torch==1.13.1 torchvision==0.14.1 --index-url https://download.pytorch.org/whl/cu117 else python3.9 -m pip install torch==2.0.1 torchvision==0.15.2 --index-url https://download.pytorch.org/whl/cu117 - # Workaround for https://github.com/pytorch/pytorch/issues/97258 - python3.9 -m pip install tensorflow==2.12.13 tensorflow_io==0.32.0 fi + # Workaround for https://github.com/pytorch/pytorch/issues/97258 + python3.9 -m pip install tensorflow==2.12.0 tensorflow_io==0.32.0 + python3.9 -m pip install -e . python3.9 -m pip install webdataset==0.2.48 mlflow-skinny - python3.9 -m pip freeze |grep -e torch -e pex + python3.9 -m pip freeze |grep -e torch -e pex -e tensor # Execute examples pushd tf_yarn/examples/pytorch @@ -50,6 +51,7 @@ do echo "=============================================" done popd + deactivate done exit $exit_code