From ecd454b6c26f30fc0467d755ad715969a727cb0d Mon Sep 17 00:00:00 2001 From: azoghbi Date: Tue, 31 Dec 2024 17:02:00 -0500 Subject: [PATCH] move changed-images check to a python script and add testing --- .github/workflows/image-build.yml | 49 ++++----------- astro-default/Dockerfile | 2 +- scripts/changed_images.py | 100 ++++++++++++++++++++++++++++++ tests/test_build_code.py | 59 +++++++++++++++++- 4 files changed, 171 insertions(+), 39 deletions(-) create mode 100644 scripts/changed_images.py diff --git a/.github/workflows/image-build.yml b/.github/workflows/image-build.yml index 991dbae..46c8519 100644 --- a/.github/workflows/image-build.yml +++ b/.github/workflows/image-build.yml @@ -22,7 +22,7 @@ jobs: id-token: write outputs: - dir_changes: ${{ steps.directory-changes.outputs.changed_dirs }} + dir_changes: ${{ steps.changed-images.outputs.changed_dirs }} steps: - name: Debug Event Payload @@ -36,40 +36,6 @@ jobs: - name: Git branch name id: git-branch-name run: echo "${{ github.head_ref || github.ref_name }}" - - - - name: Directory Changes - id: directory-changes - run: | - case "${{ github.event_name }}" in - "pull_request") - base_ref="${GITHUB_BASE_REF}" - - git fetch origin "${base_ref}" - changed_dirs="$(git --no-pager diff --name-only HEAD origin/${base_ref} | xargs -n1 dirname | sort -u)" - ;; - - "push") - git fetch origin "${{ github.event.before }}" - changed_dirs="$(git --no-pager diff-tree --name-only -r ${{ github.event.before }}..${{ github.event.after }} | xargs -n1 dirname | sort -u)" - ;; - - *) - changed_dirs="$(git ls-files | xargs -n1 dirname | sort -u)" - ;; - esac - - # keep only image directories - dirs="" - for dir in $changed_dirs; do - if [ -f $dir/Dockerfile ]; then - dirs="$dirs $dir" - fi - done - # save it in json format - changed_dirs=$(echo "$dirs" | jq -R -c 'gsub("\r\n";"") | split(" ") | map(select(length > 0))') - echo "++ changed_dirs: $changed_dirs" - echo "changed_dirs=$changed_dirs" >> $GITHUB_OUTPUT - name: Setup Python @@ -77,6 +43,16 @@ jobs: id: setup_python with: python-version: '3.12' + + - name: Changed Images + id: changed-images + run: | + cat < tmp_github.json + ${{ toJson(github) }} + EOF + changed_dirs=`python scripts/changed_images.py tmp_github.json --debug` + echo "changed_dirs=$changed_dirs" >> $GITHUB_OUTPUT + - name: Log in to the Container registry uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 @@ -88,7 +64,7 @@ jobs: - name: Build and push images id: push env: - DIR_CHANGES: ${{ steps.directory-changes.outputs.changed_dirs }} + DIR_CHANGES: ${{ steps.changed-images.outputs.changed_dirs }} run: | echo "DIR_CHANGES: $DIR_CHANGES" python scripts/build.py --debug \ @@ -103,4 +79,3 @@ jobs: with: # pass the images that changed for testing images: ${{ needs.build-and-push-images.outputs.dir_changes }} - diff --git a/astro-default/Dockerfile b/astro-default/Dockerfile index 4b49225..44b2725 100644 --- a/astro-default/Dockerfile +++ b/astro-default/Dockerfile @@ -19,4 +19,4 @@ COPY --chmod=0755 update-notebooks.sh /usr/local/bin/before-notebook.d USER $NB_USER # For firefly -ENV FIREFLY_URL=https://irsa.ipac.caltech.edu/irsaviewer \ No newline at end of file +ENV FIREFLY_URL=https://irsa.ipac.caltech.edu/irsaviewer diff --git a/scripts/changed_images.py b/scripts/changed_images.py new file mode 100644 index 0000000..0b9e5d2 --- /dev/null +++ b/scripts/changed_images.py @@ -0,0 +1,100 @@ +import argparse +import json +import os +import sys +import logging + +sys.path.insert(0, f'{os.path.dirname(__file__)}') +from build import TaskRunner + +def find_changed_images(github_data:dict, runner:TaskRunner): + """Find changed images + + Returns a list of image names that changed after the git event + """ + + if github_data['event_name'] == 'pull_request': + base_ref = github_data['event']['base_ref'] + + cmd = f'git fetch origin {base_ref}' + out = runner.run(cmd, 500, capture_output=True) + + cmd = f'git --no-pager diff --name-only HEAD origin/${base_ref} | xargs -n1 dirname | sort -u' + final_out = runner.run(cmd, 500, capture_output=True) + + elif github_data['event_name'] == 'push': + before = github_data['event']['before'] + after = github_data['event']['after'] + + cmd = f'git fetch origin {before}' + out = runner.run(cmd, 500, capture_output=True) + + cmd = f'git --no-pager diff-tree --name-only -r {before}..{after} | xargs -n1 dirname | sort -u' + final_out = runner.run(cmd, 500, capture_output=True) + + else: + cmd = 'git ls-files | xargs -n1 dirname | sort -u' + final_out = runner.run(cmd, 500, capture_output=True) + + changed_images = [] + if not runner.dryrun: + changed_images = final_out.stdout.strip() + + # from str to list + changed_images = changed_images.split() + + # keep a list of images, i.e. folders that contain Dockerfile + changed_images = [cdir for cdir in changed_images if os.path.exists(f'{cdir}/Dockerfile')] + + return changed_images + + +if __name__ == '__main__': + + ap = argparse.ArgumentParser() + + ap.add_argument('jsonfile', + help="File name of the json file that contains Github action context") + + ap.add_argument('--dryrun', action='store_true', + help='prepare but do not run commands', + default=False) + + ap.add_argument('--debug', action='store_true', + help='Print debug messages', + default=False) + + args = ap.parse_args() + # get parameters + dryrun = args.dryrun + debug = args.debug + jsonfile = args.jsonfile + + with open(args.jsonfile, "r") as file: + data = json.load(file) + + logging.basicConfig( + format="%(asctime)s|%(levelname)5s| %(message)s", + datefmt="%Y-%m-%d|%H:%M:%S", + ) + logger = logging.getLogger('::Changed-dirs::') + logger.setLevel(level=logging.DEBUG if debug else logging.INFO) + runner = TaskRunner(logger, dryrun) + + # some logging: + runner.out('+++ INPUT +++', logging.DEBUG) + runner.out(f'jsonfile: {jsonfile}', logging.DEBUG) + runner.out(f'debug: {debug}', logging.DEBUG) + runner.out(f'dryrun: {dryrun}', logging.DEBUG) + runner.out(f'event_name: {data["event_name"]}', logging.DEBUG) + runner.out('+++++++++++++', logging.DEBUG) + + changed_images = find_changed_images(data, runner) + + # print the result in json text so it is picked up in the CI script + res = json.dumps(changed_images) + runner.out('+++ OUTPUT +++', logging.DEBUG) + runner.out(res) + runner.out('++++++++++++++', logging.DEBUG) + # clean print so it is picked up by the CI + print(res) diff --git a/tests/test_build_code.py b/tests/test_build_code.py index 904941b..0a57067 100644 --- a/tests/test_build_code.py +++ b/tests/test_build_code.py @@ -6,11 +6,14 @@ import pathlib import tempfile import glob +import json +import subprocess from io import StringIO sys.path.insert(0, f'{os.path.dirname(__file__)}/../scripts/') from build import TaskRunner, Builder +from changed_images import find_changed_images class TestTaskRunner(unittest.TestCase): @@ -209,5 +212,59 @@ def run(cmd, timeout, **kw): self.assertEqual(result, "name:\nnextline") self.logger.handlers.clear() +class TestChangedImages(unittest.TestCase): + + def setUp(self): + logger = logging.getLogger() + self.runner = TaskRunner(logger, dryrun=True) + self.logger = logger + + def test_pull_request(self): + pull_request_event = { + 'event_name': 'pull_request', + 'event': { + 'base_ref': '7905b4edab6' + } + } + with patch('sys.stderr', new=StringIO()) as mock_out: + logging.basicConfig(level=logging.DEBUG) + res = find_changed_images(pull_request_event, self.runner) + output = mock_out.getvalue().strip() + base_ref = pull_request_event['event']['base_ref'] + self.assertTrue(f'git fetch origin {base_ref}' in output) + self.assertTrue(f'git --no-pager diff --name-only HEAD origin/${base_ref} | xargs -n1 dirname | sort -u' in output) + self.assertEqual(res, []) + self.logger.handlers.clear() + + def test_push(self): + push_event = { + 'event_name': 'push', + 'event': { + 'before': '299390bb5c8', + 'after': '2add5c8e038' + } + } + with patch('sys.stderr', new=StringIO()) as mock_out: + logging.basicConfig(level=logging.DEBUG) + res = find_changed_images(push_event, self.runner) + output = mock_out.getvalue().strip() + before = push_event['event']['before'] + after = push_event['event']['after'] + self.assertTrue(f'git fetch origin {before}' in output) + self.assertTrue(f'git --no-pager diff-tree --name-only -r {before}..{after} | xargs -n1 dirname | sort -u' in output) + self.assertEqual(res, []) + self.logger.handlers.clear() + + def test_else_event(self): + else_event = {'event_name': 'other'} + with patch('sys.stderr', new=StringIO()) as mock_out: + logging.basicConfig(level=logging.DEBUG) + res = find_changed_images(else_event, self.runner) + output = mock_out.getvalue().strip() + self.assertTrue(f'git ls-files | xargs -n1 dirname | sort -u' in output) + self.assertEqual(res, []) + self.logger.handlers.clear() + + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main()