From 42a9233e1dfa2c26d8318f32ca38e721b0b8ffb2 Mon Sep 17 00:00:00 2001 From: Alok Singh <8325708+alok@users.noreply.github.com> Date: Tue, 5 Jun 2018 20:22:11 -0700 Subject: [PATCH] Improve yapf speed and document its usage (#2160) * Allow yapf to lint individual files * Add tip for using yapf * Update doc * Update script to autoformat changed py files The new default is for the script to only updated changed files to encourage using it as a pre-push hook. Travis still checks all since it's not that big an increase to runtime. * Exclude formatting thirdparty/autogen py files * Symlink .travis -> scripts Hidden directories may get glossed over otherwise. * .travis -> scripts in docs They are symlinks to the same thing, but `scripts` is more dev-friendly, while `.travis` is really only for Travis CI. * Document different yapf format functions Most devs will only need `format_changed`, and this is run by default. `format_changed` should be fast enough in most cases to work as a pre-commit hook. * Speed up yapf by only formatting changed files * Update docs 1. Mention how yapf can be used a pre-commit hook 2. rm `bash`, script is executable * Update yapf.sh * Update development.rst * Update yapf.sh * Use bash arrays for correct argument splitting Playing fast and loose with whitespace in bash is a terrible idea. * Only format non-excluded by default * Check changes against master Normally, the remote is called `origin`, but naming it explicit * Adding missing directory to `format_all` * Cleanup YAPF code Remove unused function and move around code to make clearer and adding lines give cleaner diffs. * Ensure correct files are autoformatted * Fix cmd line arg splitting Each arg has to be in its own set of quotes. * Diff against mergebase TIL there's a clean syntax for doing that, but it's too clever to belong in a shell script. We use `mapfile -t` to ensure no problems down the line with weird filenames. --- .travis.yml | 2 +- .travis/yapf.sh | 82 ++++++++++++++++++++++++++++++++------ doc/source/development.rst | 7 +++- scripts | 1 + 4 files changed, 77 insertions(+), 15 deletions(-) create mode 120000 scripts diff --git a/.travis.yml b/.travis.yml index cbb7a77196e53..9cc64aabd0625 100644 --- a/.travis.yml +++ b/.travis.yml @@ -54,7 +54,7 @@ matrix: - cd .. # Run Python linting. - flake8 --exclude=python/ray/core/src/common/flatbuffers_ep-prefix/,python/ray/core/generated/,src/common/format/,doc/source/conf.py,python/ray/cloudpickle/ - - .travis/yapf.sh + - .travis/yapf.sh --all - os: linux dist: trusty diff --git a/.travis/yapf.sh b/.travis/yapf.sh index 03828d6627d82..4ec36d35b2589 100755 --- a/.travis/yapf.sh +++ b/.travis/yapf.sh @@ -7,19 +7,75 @@ set -eo pipefail builtin cd "$(dirname "${BASH_SOURCE:-$0}")" ROOT="$(git rev-parse --show-toplevel)" -builtin cd "$ROOT" - -yapf \ - --style "$ROOT/.style.yapf" \ - --in-place --recursive --parallel \ - --exclude 'python/ray/cloudpickle' \ - --exclude 'python/ray/dataframe' \ - --exclude 'python/ray/rllib' \ - -- \ - 'test' 'python' - -if ! git diff --quiet; then - echo 'Reformatted staged files. Please review and stage the changes.' +builtin cd "$ROOT" || exit 1 + +# Add the upstream branch if it doesn't exist +if ! [[ -e "$ROOT/.git/refs/remotes/upstream" ]]; then + git remote add 'upstream' 'https://github.com/ray-project/ray.git' +fi + +# Only fetch master since that's the branch we're diffing against. +git fetch upstream master + +YAPF_FLAGS=( + '--style' "$ROOT/.style.yapf" + '--in-place' + '--recursive' + '--parallel' +) + +YAPF_EXCLUDES=( + '--exclude' 'python/ray/dataframe' + '--exclude' 'python/ray/rllib' + '--exclude' 'python/ray/cloudpickle' + '--exclude' 'python/build' + '--exclude' 'python/ray/pyarrow_files' + '--exclude' 'python/ray/core/src/ray/gcs' + '--exclude' 'python/ray/common/thirdparty' +) + +# Format specified files +format() { + yapf "${YAPF_FLAGS[@]}" -- "$@" +} + +# Format files that differ from main branch. Ignores dirs that are not slated +# for autoformat yet. +format_changed() { + # The `if` guard ensures that the list of filenames is not empty, which + # could cause yapf to receive 0 positional arguments, making it hang + # waiting for STDIN. + # + # `diff-filter=ACM` and $MERGEBASE is to ensure we only format files that + # exist on both branches. + MERGEBASE="$(git merge-base upstream/master HEAD)" + + if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' &>/dev/null; then + declare -a unformatted_files && mapfile -t unformatted_files < <(git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py') + yapf "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}" -- "${unformatted_files[@]}" + fi +} + +# Format all files +format_all() { + yapf "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" test python +} + +# This flag formats individual files. --files *must* be the first command line +# arg to use this option. +if [[ "$1" == '--files' ]]; then + format "${@:2}" + # If `--all` is passed, then any further arguments are ignored and the + # entire python directory is formatted. +elif [[ "$1" == '--all' ]]; then + format_all +else + # Format only the files that changed in last commit. + format_changed +fi + +if ! git diff --quiet &>/dev/null; then + echo 'Reformatted changed files. Please review and stage the changes.' echo 'Files updated:' echo diff --git a/doc/source/development.rst b/doc/source/development.rst index 7c86ef1a0045c..bd82a1a48db74 100644 --- a/doc/source/development.rst +++ b/doc/source/development.rst @@ -54,7 +54,12 @@ helpful. something like ``flake8 ray/python/ray/worker.py``. You may need to first run ``pip install flake8``. -5. **Inspecting Redis shards by hand:** To inspect the primary Redis shard by +5. **Autoformatting code**. We use ``yapf`` https://github.com/google/yapf for + linting, and the config file is located at ``.style.yapf``. We recommend + running ``.travis/yapf.sh`` prior to pushing to format changed files. + Note that some projects such as dataframes and rllib are currently excluded. + +6. **Inspecting Redis shards by hand:** To inspect the primary Redis shard by hand, you can query it with commands like the following. .. code-block:: python diff --git a/scripts b/scripts new file mode 120000 index 0000000000000..4b675597e09aa --- /dev/null +++ b/scripts @@ -0,0 +1 @@ +.travis \ No newline at end of file