From 27b3ccb22538047e9ac0271cd7de24ae79a2b526 Mon Sep 17 00:00:00 2001 From: Federico Mon Date: Fri, 11 Oct 2024 08:32:11 +0200 Subject: [PATCH 01/14] ci: always run system tests [backport 2.13] (#10919) CI: Removes conditions to check wether should run or "dry run" system tests, and just runs them ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) (cherry picked from commit 7f6f3dbe7c2e00fe84cdf944e1121304306fb649) Co-authored-by: erikayasuda <153395705+erikayasuda@users.noreply.github.com> --- .github/workflows/system-tests.yml | 94 ++++++++++-------------------- 1 file changed, 30 insertions(+), 64 deletions(-) diff --git a/.github/workflows/system-tests.yml b/.github/workflows/system-tests.yml index 3f936ae2da5..e198e13751e 100644 --- a/.github/workflows/system-tests.yml +++ b/.github/workflows/system-tests.yml @@ -10,44 +10,24 @@ on: - cron: '00 04 * * 2-6' jobs: - needs-run: - runs-on: ubuntu-latest - outputs: - outcome: ${{ steps.run_needed.outcome }} - steps: - - uses: actions/checkout@v4 - - id: run_needed - name: Check if run is needed - run: | - git fetch origin ${{ github.event.pull_request.base.sha || github.sha }} - export PATHS=$(git diff --name-only HEAD ${{ github.event.pull_request.base.sha || github.sha }}) - python -c "import os,sys,fnmatch;sys.exit(not bool([_ for pattern in {'ddtrace/*', 'setup*', 'pyproject.toml', '.github/workflows/system-tests.yml'} for _ in fnmatch.filter(os.environ['PATHS'].splitlines(), pattern)]))" - continue-on-error: true - system-tests-build-agent: runs-on: ubuntu-latest - needs: needs-run steps: - name: Checkout system tests - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' uses: actions/checkout@v4 with: repository: 'DataDog/system-tests' - name: Build agent - id: build - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' run: ./build.sh -i agent - name: Save id: save - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' run: | docker image save system_tests/agent:latest | gzip > agent_${{ github.sha }}.tar.gz - uses: actions/upload-artifact@v4 - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' with: name: agent_${{ github.sha }} path: | @@ -56,7 +36,6 @@ jobs: system-tests-build-weblog: runs-on: ubuntu-latest - needs: needs-run strategy: matrix: include: @@ -78,13 +57,11 @@ jobs: steps: - name: Checkout system tests - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' uses: actions/checkout@v4 with: repository: 'DataDog/system-tests' - name: Checkout dd-trace-py - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' uses: actions/checkout@v4 with: path: 'binaries/dd-trace-py' @@ -94,18 +71,14 @@ jobs: ref: ${{ github.event.pull_request.head.sha || github.sha }} - name: Build - id: build - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' run: ./build.sh -i weblog - name: Save id: save - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' run: | docker image save system_tests/weblog:latest | gzip > ${{ matrix.weblog-variant}}_weblog_${{ github.sha }}.tar.gz - uses: actions/upload-artifact@v4 - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' with: name: ${{ matrix.weblog-variant }}_${{ github.sha }} path: | @@ -114,7 +87,7 @@ jobs: system-tests: runs-on: ubuntu-latest - needs: [needs-run, system-tests-build-agent, system-tests-build-weblog] + needs: [system-tests-build-agent, system-tests-build-weblog] strategy: matrix: weblog-variant: [flask-poc, uwsgi-poc , django-poc, fastapi, python3.12] @@ -132,130 +105,126 @@ jobs: steps: - name: Checkout system tests - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' uses: actions/checkout@v4 with: repository: 'DataDog/system-tests' - name: Build runner - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' uses: ./.github/actions/install_runner - uses: actions/download-artifact@v4 - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' with: name: ${{ matrix.weblog-variant }}_${{ github.sha }} path: images_artifacts/ - uses: actions/download-artifact@v4 - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' with: name: agent_${{ github.sha }} path: images_artifacts/ - name: docker load - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' + id: docker_load run: | docker load < images_artifacts/${{ matrix.weblog-variant}}_weblog_${{ github.sha }}.tar.gz docker load < images_artifacts/agent_${{ github.sha }}.tar.gz - name: Run DEFAULT - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'other' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'other' run: ./run.sh DEFAULT - name: Run SAMPLING - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'other' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'other' run: ./run.sh SAMPLING - name: Run INTEGRATIONS - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'other' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'other' run: ./run.sh INTEGRATIONS - name: Run CROSSED_TRACING_LIBRARIES - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'other' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'other' run: ./run.sh CROSSED_TRACING_LIBRARIES - name: Run REMOTE_CONFIG_MOCKED_BACKEND_ASM_FEATURES - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'remote-config' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'remote-config' run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_ASM_FEATURES - name: Run REMOTE_CONFIG_MOCKED_BACKEND_LIVE_DEBUGGING - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'remote-config' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'remote-config' run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_LIVE_DEBUGGING - name: Run REMOTE_CONFIG_MOCKED_BACKEND_ASM_DD - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'remote-config' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'remote-config' run: ./run.sh REMOTE_CONFIG_MOCKED_BACKEND_ASM_DD - name: Run APPSEC_MISSING_RULES - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec' run: ./run.sh APPSEC_MISSING_RULES - name: Run APPSEC_CUSTOM_RULES - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec' run: ./run.sh APPSEC_CUSTOM_RULES - name: Run APPSEC_CORRUPTED_RULES - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec' run: ./run.sh APPSEC_CORRUPTED_RULES - name: Run APPSEC_RULES_MONITORING_WITH_ERRORS - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec' run: ./run.sh APPSEC_RULES_MONITORING_WITH_ERRORS - name: Run APPSEC_LOW_WAF_TIMEOUT - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec' run: ./run.sh APPSEC_LOW_WAF_TIMEOUT - name: Run APPSEC_CUSTOM_OBFUSCATION - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec' run: ./run.sh APPSEC_CUSTOM_OBFUSCATION - name: Run APPSEC_RATE_LIMITER - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec' run: ./run.sh APPSEC_RATE_LIMITER - name: Run APPSEC_STANDALONE - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec-1' run: ./run.sh APPSEC_STANDALONE - name: Run APPSEC_RUNTIME_ACTIVATION - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec-1' run: ./run.sh APPSEC_RUNTIME_ACTIVATION - name: Run APPSEC_WAF_TELEMETRY - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec-1' run: ./run.sh APPSEC_WAF_TELEMETRY - name: Run APPSEC_DISABLED - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec-1' run: ./run.sh APPSEC_DISABLED - name: Run APPSEC_BLOCKING - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec-1' run: ./run.sh APPSEC_BLOCKING - name: Run APPSEC_BLOCKING_FULL_DENYLIST - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec-1' run: ./run.sh APPSEC_BLOCKING_FULL_DENYLIST - name: Run APPSEC_REQUEST_BLOCKING - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec-1' run: ./run.sh APPSEC_REQUEST_BLOCKING - name: Run APPSEC_RASP - if: (needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule') && matrix.scenario == 'appsec-1' + if: always() && steps.docker_load.outcome == 'success' && matrix.scenario == 'appsec-1' run: ./run.sh APPSEC_RASP # The compress step speed up a lot the upload artifact process - name: Compress artifact - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' + if: always() && steps.docker_load.outcome == 'success' id: compress-artifact run: tar -czvf artifact.tar.gz $(ls | grep logs) - name: Upload artifact uses: actions/upload-artifact@v4 - if: steps.compress-artifact.outcome == 'success' || github.event_name == 'schedule' + if: always() && steps.docker_load.outcome == 'success' with: name: logs_${{ matrix.weblog-variant }}_${{ matrix.scenario }} path: artifact.tar.gz @@ -264,17 +233,14 @@ jobs: parametric: runs-on: group: "APM Larger Runners" - needs: needs-run env: TEST_LIBRARY: python steps: - name: Checkout system tests - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' uses: actions/checkout@v4 with: repository: 'DataDog/system-tests' - name: Checkout dd-trace-py - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' uses: actions/checkout@v4 with: path: 'binaries/dd-trace-py' @@ -282,20 +248,20 @@ jobs: ref: ${{ github.event.pull_request.head.sha || github.sha }} - name: Build runner - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' + id: build_runner uses: ./.github/actions/install_runner - name: Run - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' + if: always() && steps.build_runner.outcome == 'success' run: ./run.sh PARAMETRIC - name: Compress artifact - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' + if: always() && steps.build_runner.outcome == 'success' run: tar -czvf artifact.tar.gz $(ls | grep logs) - name: Upload artifact uses: actions/upload-artifact@v4 - if: needs.needs-run.outputs.outcome == 'success' || github.event_name == 'schedule' + if: always() && steps.build_runner.outcome == 'success' with: name: logs_parametric path: artifact.tar.gz From 80cf613f3167b943262b704964222d2d5aa0f38d Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 11 Oct 2024 13:53:36 +0200 Subject: [PATCH 02/14] fix(profiling): explicitly link with thread library [backport-2.13] (#11003) Manual backport of #10994 to 2.13 python:3.11-bookworm image for aarch64 has `pthread_atfork` in static libpthread.a not in libpthread.so so need to explicitly link for those cases. Otherwise, we get undefined symbol `pthread_atfork`. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .../profiling/dd_wrapper/CMakeLists.txt | 11 ++++++++--- .../datadog/profiling/stack_v2/CMakeLists.txt | 18 +++++++++++++++--- ...iling-pthread-aarch64-d4548fd1842d0665.yaml | 6 ++++++ 3 files changed, 29 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/profiling-pthread-aarch64-d4548fd1842d0665.yaml diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt index 2c292863df7..51dbc126dc1 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt @@ -20,6 +20,13 @@ include(FindCppcheck) include(FindInfer) include(CheckSymbolExists) +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +if(NOT Threads_FOUND OR NOT CMAKE_USE_PTHREADS_INIT) + message(FATAL_ERROR "pthread compatible library not found") +endif() + # Library sources add_library(dd_wrapper SHARED src/uploader_builder.cpp @@ -46,9 +53,7 @@ target_include_directories(dd_wrapper PRIVATE ${Datadog_INCLUDE_DIRS} ) -target_link_libraries(dd_wrapper PRIVATE - ${Datadog_LIBRARIES} -) +target_link_libraries(dd_wrapper PRIVATE ${Datadog_LIBRARIES} Threads::Threads) # Figure out the suffix. Try to approximate the cpython way of doing things. ## C library diff --git a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt index f31639fabd7..469e39ef5a9 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt @@ -20,11 +20,20 @@ include(FindCppcheck) # but whether or not we keep that design is unknown. Hack it for now. add_subdirectory(../dd_wrapper ${CMAKE_CURRENT_BINARY_DIR}/../dd_wrapper_build) +find_package(Python3 COMPONENTS Interpreter Development) + # Make sure we have necessary Python variables if (NOT Python3_INCLUDE_DIRS) message(FATAL_ERROR "Python3_INCLUDE_DIRS not found") endif() +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +if(NOT Threads_FOUND OR NOT CMAKE_USE_PTHREADS_INIT) + message(FATAL_ERROR "pthread compatible library not found") +endif() + # Add echion set(ECHION_COMMIT "b2f2d49f2f5d5c4dd78d1d9b83280db8ac2949c0" CACHE STRING "Commit hash of echion to use") FetchContent_Declare( @@ -90,9 +99,12 @@ set_target_properties(${EXTENSION_NAME} PROPERTIES SUFFIX "") # RPATH is needed for sofile discovery at runtime, since Python packages are not # installed in the system path. This is typical. set_target_properties(${EXTENSION_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/..") -target_link_libraries(${EXTENSION_NAME} PRIVATE - dd_wrapper -) + +target_link_libraries(${EXTENSION_NAME} PRIVATE dd_wrapper Threads::Threads) + +if(Python3_LIBRARIES) + target_link_libraries(${EXTENSION_NAME} PRIVATE ${Python3_LIBRARIES}) +endif() # Extensions are built as dynamic libraries, so PIC is required. set_target_properties(${EXTENSION_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/releasenotes/notes/profiling-pthread-aarch64-d4548fd1842d0665.yaml b/releasenotes/notes/profiling-pthread-aarch64-d4548fd1842d0665.yaml new file mode 100644 index 00000000000..cd7eeb08900 --- /dev/null +++ b/releasenotes/notes/profiling-pthread-aarch64-d4548fd1842d0665.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + profiling: fixes an issue where stack v2 couldn't be enabled as pthread + was not properly linked on some debian based images for aarch64 architecture. + From d8c37f4eec56429e11387dd5ef0c845b863bdfc4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 15 Oct 2024 15:52:13 +0000 Subject: [PATCH 03/14] fix(botocore): bedrock cross-region inference model name does not throw [backport 2.13] (#10965) Backport 80a3ee85f7620e32d60cd26925a398dd81790f15 from #10949 to 2.13. ## What does this PR do? Fixes #10772 Model IDs with cross-region inference would throw because we assumed `modelID` would only have the `provider` and `model_name`, when it could have the region as well. This would result in: ``` File /python3.11/site-packages/ddtrace/contrib/internal/botocore/services/bedrock.py:321, in patched_bedrock_api_call(original_func, instance, args, kwargs, function_vars) 319 params = function_vars.get("params") 320 pin = function_vars.get("pin") --> 321 model_provider, model_name = params.get("modelId").split(".") 322 integration = function_vars.get("integration") 323 submit_to_llmobs = integration.llmobs_enabled and "embed" not in model_name ValueError: too many values to unpack (expected 2) ``` We are not tagging that cross-region inference in this PR, just resolving the error. ### Testing To test this change, a singular `anthropic_message` test case and corresponding cassette was modified. This had an unfortunate side effect of affecting an LLMObs test as well. I can add a different test instead, but I believe that would require an additional 60+ line cassette which isn't totally needed, so I opted for this instead. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Sam Brenner <106700075+sabrenner@users.noreply.github.com> --- ddtrace/contrib/internal/botocore/services/bedrock.py | 6 +++++- ...k-cross-region-inference-model-fix-179b7f1ddd6e8e02.yaml | 4 ++++ .../bedrock_cassettes/anthropic_message_invoke.yaml | 2 +- tests/contrib/botocore/test_bedrock.py | 1 + tests/contrib/botocore/test_bedrock_llmobs.py | 4 ++++ 5 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/botocore-bedrock-cross-region-inference-model-fix-179b7f1ddd6e8e02.yaml diff --git a/ddtrace/contrib/internal/botocore/services/bedrock.py b/ddtrace/contrib/internal/botocore/services/bedrock.py index fca33630f57..df547e8f095 100644 --- a/ddtrace/contrib/internal/botocore/services/bedrock.py +++ b/ddtrace/contrib/internal/botocore/services/bedrock.py @@ -318,7 +318,11 @@ def handle_bedrock_response( def patched_bedrock_api_call(original_func, instance, args, kwargs, function_vars): params = function_vars.get("params") pin = function_vars.get("pin") - model_provider, model_name = params.get("modelId").split(".") + model_meta = params.get("modelId").split(".") + if len(model_meta) == 2: + model_provider, model_name = model_meta + else: + _, model_provider, model_name = model_meta # cross-region inference integration = function_vars.get("integration") submit_to_llmobs = integration.llmobs_enabled and "embed" not in model_name with core.context_with_data( diff --git a/releasenotes/notes/botocore-bedrock-cross-region-inference-model-fix-179b7f1ddd6e8e02.yaml b/releasenotes/notes/botocore-bedrock-cross-region-inference-model-fix-179b7f1ddd6e8e02.yaml new file mode 100644 index 00000000000..be6f6d0dc27 --- /dev/null +++ b/releasenotes/notes/botocore-bedrock-cross-region-inference-model-fix-179b7f1ddd6e8e02.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + botocore: fixes bedrock model and model provider interpretation from ``modelId`` when using cross-region inference. diff --git a/tests/contrib/botocore/bedrock_cassettes/anthropic_message_invoke.yaml b/tests/contrib/botocore/bedrock_cassettes/anthropic_message_invoke.yaml index a4abf1ac291..dfc0235d6cf 100644 --- a/tests/contrib/botocore/bedrock_cassettes/anthropic_message_invoke.yaml +++ b/tests/contrib/botocore/bedrock_cassettes/anthropic_message_invoke.yaml @@ -21,7 +21,7 @@ interactions: - !!binary | YXR0ZW1wdD0x method: POST - uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1%3A0/invoke + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.anthropic.claude-3-sonnet-20240229-v1%3A0/invoke response: body: string: '{"id":"msg_01E6sPP1ksqicYCaBrkvzna8","type":"message","role":"assistant","content":[{"type":"text","text":"Hobbits diff --git a/tests/contrib/botocore/test_bedrock.py b/tests/contrib/botocore/test_bedrock.py index 90c7669cba5..3c91b147a97 100644 --- a/tests/contrib/botocore/test_bedrock.py +++ b/tests/contrib/botocore/test_bedrock.py @@ -173,6 +173,7 @@ def test_anthropic_invoke(bedrock_client, request_vcr): @pytest.mark.snapshot def test_anthropic_message_invoke(bedrock_client, request_vcr): body, model = json.dumps(_REQUEST_BODIES["anthropic_message"]), _MODELS["anthropic_message"] + model = "us." + model with request_vcr.use_cassette("anthropic_message_invoke.yaml"): response = bedrock_client.invoke_model(body=body, modelId=model) json.loads(response.get("body").read()) diff --git a/tests/contrib/botocore/test_bedrock_llmobs.py b/tests/contrib/botocore/test_bedrock_llmobs.py index ae2fb7894c6..b7e80651f35 100644 --- a/tests/contrib/botocore/test_bedrock_llmobs.py +++ b/tests/contrib/botocore/test_bedrock_llmobs.py @@ -128,6 +128,10 @@ def _test_llmobs_invoke(cls, provider, bedrock_client, mock_llmobs_span_writer, } with get_request_vcr().use_cassette(cassette_name): body, model = json.dumps(body), _MODELS[provider] + if provider == "anthropic_message": + # we do this to re-use a cassette which tests + # cross-region inference + model = "us." + model response = bedrock_client.invoke_model(body=body, modelId=model) json.loads(response.get("body").read()) span = mock_tracer.pop_traces()[0][0] From f4dc5b9df79991ce74271be626bdfeecfe703d54 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:46:28 +0200 Subject: [PATCH 04/14] fix(iast): re.finditer aspect error [backport 2.13] (#11028) Backport 9973b22923c7590fa7da06fafc5fac2cb2f1a84e from #11027 to 2.13. Ensure IAST propagation does not raise side effects related to re.finditer. We detect this error in #10988 PR, when FastAPI headers were empty in framework tests: https://github.com/DataDog/dd-trace-py/actions/runs/11273577079/job/31350947622 We can revert this system tests PR after this PR: https://github.com/DataDog/system-tests/pull/3230 This error was detected in [python-multipart==0.0.05](https://pypi.org/project/python-multipart/0.0.5/) ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Alberto Vara --- benchmarks/bm/iast_fixtures/str_methods.py | 4 ++ .../appsec/_iast/_taint_tracking/aspects.py | 9 ++-- ...x-re-finditer-aspect-8925b30073169222.yaml | 4 ++ tests/appsec/app.py | 2 + .../iast/aspects/test_join_aspect_fixtures.py | 26 ++++++++++ tests/appsec/iast/aspects/test_re_aspects.py | 20 +++++++- .../packages/pkg_python_multipart.py | 49 +++++++++++++++++++ tests/appsec/iast_packages/test_packages.py | 15 ++++-- .../fastapi/test_fastapi_appsec_iast.py | 33 +++++++++++++ 9 files changed, 152 insertions(+), 10 deletions(-) create mode 100644 releasenotes/notes/iast-fix-re-finditer-aspect-8925b30073169222.yaml create mode 100644 tests/appsec/iast_packages/packages/pkg_python_multipart.py diff --git a/benchmarks/bm/iast_fixtures/str_methods.py b/benchmarks/bm/iast_fixtures/str_methods.py index 813303e9d41..08fd75ee421 100644 --- a/benchmarks/bm/iast_fixtures/str_methods.py +++ b/benchmarks/bm/iast_fixtures/str_methods.py @@ -903,6 +903,10 @@ def do_join_generator(mystring: str) -> Text: return "".join(gen) +def do_join_generator_as_argument(mystring: str, gen: Generator[str, None, None]) -> Text: + return mystring.join(gen) + + def do_join_generator_2(mystring: str) -> Text: def parts() -> Generator: for i in ["x", "y", "z"]: diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects.py b/ddtrace/appsec/_iast/_taint_tracking/aspects.py index 14f0a829f0c..b9ddaac57d0 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects.py +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects.py @@ -2,6 +2,7 @@ from builtins import bytes as builtin_bytes from builtins import str as builtin_str import codecs +import itertools from re import Match from re import Pattern from types import BuiltinFunctionType @@ -951,9 +952,7 @@ def re_findall_aspect( return result -def re_finditer_aspect( - orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any -) -> Union[TEXT_TYPES, Tuple[TEXT_TYPES, int]]: +def re_finditer_aspect(orig_function: Optional[Callable], flag_added_args: int, *args: Any, **kwargs: Any) -> Iterator: if orig_function is not None and (not flag_added_args or not args): # This patch is unexpected, so we fallback # to executing the original function @@ -982,9 +981,9 @@ def re_finditer_aspect( string = args[0] if is_pyobject_tainted(string): ranges = get_ranges(string) - for elem in result: + result, result_backup = itertools.tee(result) + for elem in result_backup: taint_pyobject_with_ranges(elem, ranges) - return result diff --git a/releasenotes/notes/iast-fix-re-finditer-aspect-8925b30073169222.yaml b/releasenotes/notes/iast-fix-re-finditer-aspect-8925b30073169222.yaml new file mode 100644 index 00000000000..1f0bdcd78bc --- /dev/null +++ b/releasenotes/notes/iast-fix-re-finditer-aspect-8925b30073169222.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Code Security: Ensure IAST propagation does not raise side effects related to re.finditer. \ No newline at end of file diff --git a/tests/appsec/app.py b/tests/appsec/app.py index b6a0b04824b..8b5fd60b98e 100644 --- a/tests/appsec/app.py +++ b/tests/appsec/app.py @@ -66,6 +66,7 @@ from tests.appsec.iast_packages.packages.pkg_pyopenssl import pkg_pyopenssl from tests.appsec.iast_packages.packages.pkg_pyparsing import pkg_pyparsing from tests.appsec.iast_packages.packages.pkg_python_dateutil import pkg_python_dateutil +from tests.appsec.iast_packages.packages.pkg_python_multipart import pkg_python_multipart from tests.appsec.iast_packages.packages.pkg_pytz import pkg_pytz from tests.appsec.iast_packages.packages.pkg_pyyaml import pkg_pyyaml from tests.appsec.iast_packages.packages.pkg_requests import pkg_requests @@ -144,6 +145,7 @@ app.register_blueprint(pkg_pyopenssl) app.register_blueprint(pkg_pyparsing) app.register_blueprint(pkg_python_dateutil) +app.register_blueprint(pkg_python_multipart) app.register_blueprint(pkg_pytz) app.register_blueprint(pkg_pyyaml) app.register_blueprint(pkg_requests) diff --git a/tests/appsec/iast/aspects/test_join_aspect_fixtures.py b/tests/appsec/iast/aspects/test_join_aspect_fixtures.py index aa88daae408..8128b7c9b5c 100644 --- a/tests/appsec/iast/aspects/test_join_aspect_fixtures.py +++ b/tests/appsec/iast/aspects/test_join_aspect_fixtures.py @@ -311,6 +311,32 @@ def test_string_join_generator(self): assert result[ranges[1].start : (ranges[1].start + ranges[1].length)] == "abcde" assert result[ranges[2].start : (ranges[2].start + ranges[2].length)] == "abcde" + def test_string_join_generator_multiples_times(self): + base_string = "abcde" + gen_string = "--+--" + gen = (gen_string for _ in ["1", "2", "3"]) + result = mod.do_join_generator_as_argument(base_string, gen) + assert result == "--+--abcde--+--abcde--+--" + assert not get_tainted_ranges(result) + result = mod.do_join_generator_as_argument(base_string, gen) + assert result == "" + # Tainted + tainted_base_string = taint_pyobject( + pyobject=base_string, + source_name="joiner", + source_value=base_string, + source_origin=OriginType.PARAMETER, + ) + gen = (gen_string for _ in ["1", "2", "3"]) + result = mod.do_join_generator_as_argument(tainted_base_string, gen) + result_2 = mod.do_join_generator_as_argument(tainted_base_string, gen) + assert result == "--+--abcde--+--abcde--+--" + assert result_2 == "" + + ranges = get_tainted_ranges(result) + assert result[ranges[0].start : (ranges[0].start + ranges[0].length)] == "abcde" + assert result[ranges[1].start : (ranges[1].start + ranges[1].length)] == "abcde" + def test_string_join_args_kwargs(self): # type: () -> None # Not tainted diff --git a/tests/appsec/iast/aspects/test_re_aspects.py b/tests/appsec/iast/aspects/test_re_aspects.py index edeeeeebe32..c37b6fee1aa 100644 --- a/tests/appsec/iast/aspects/test_re_aspects.py +++ b/tests/appsec/iast/aspects/test_re_aspects.py @@ -1,6 +1,8 @@ import re import typing +import pytest + from ddtrace.appsec._iast._taint_tracking import OriginType from ddtrace.appsec._iast._taint_tracking import Source from ddtrace.appsec._iast._taint_tracking import TaintRange @@ -475,10 +477,18 @@ def test_re_finditer_aspect_tainted_string(): re_slash = re.compile(r"[/.][a-z]*") res_iterator = re_finditer_aspect(None, 1, re_slash, tainted_foobarbaz) - assert isinstance(res_iterator, typing.Iterator) + assert isinstance(res_iterator, typing.Iterator), f"res_iterator is of type {type(res_iterator)}" + try: + tainted_item = next(res_iterator) + assert get_tainted_ranges(tainted_item) == [ + TaintRange(0, 18, Source("test_re_sub_aspect_tainted_string", tainted_foobarbaz, OriginType.PARAMETER)), + ] + except StopIteration: + pytest.fail("re_finditer_aspect result generator is depleted") + for i in res_iterator: assert get_tainted_ranges(i) == [ - TaintRange(0, len(i), Source("test_re_sub_aspect_tainted_string", tainted_foobarbaz, OriginType.PARAMETER)), + TaintRange(0, 18, Source("test_re_sub_aspect_tainted_string", tainted_foobarbaz, OriginType.PARAMETER)), ] @@ -489,5 +499,11 @@ def test_re_finditer_aspect_not_tainted(): res_iterator = re_finditer_aspect(None, 1, re_slash, not_tainted_foobarbaz) assert isinstance(res_iterator, typing.Iterator) + + try: + tainted_item = next(res_iterator) + assert not is_pyobject_tainted(tainted_item) + except StopIteration: + pytest.fail("re_finditer_aspect result generator is finished") for i in res_iterator: assert not is_pyobject_tainted(i) diff --git a/tests/appsec/iast_packages/packages/pkg_python_multipart.py b/tests/appsec/iast_packages/packages/pkg_python_multipart.py new file mode 100644 index 00000000000..b8fab3ec159 --- /dev/null +++ b/tests/appsec/iast_packages/packages/pkg_python_multipart.py @@ -0,0 +1,49 @@ +""" +isodate==0.6.1 + +https://pypi.org/project/isodate/ +""" + +from flask import Blueprint +from flask import request + +from .utils import ResultResponse + + +pkg_python_multipart = Blueprint("multipart", __name__) + + +@pkg_python_multipart.route("/python-multipart") +def pkg_multipart_view(): + from multipart.multipart import parse_options_header + + response = ResultResponse(request.args.get("package_param")) + + try: + _, params = parse_options_header(response.package_param) + + response.result1 = str(params[b"boundary"], "utf-8") + except Exception as e: + response.result1 = f"Error: {str(e)}" + + return response.json() + + +@pkg_python_multipart.route("/python-multipart_propagation") +def pkg_multipart_propagation_view(): + from multipart.multipart import parse_options_header + + from ddtrace.appsec._iast._taint_tracking import is_pyobject_tainted + + response = ResultResponse(request.args.get("package_param")) + if not is_pyobject_tainted(response.package_param): + response.result1 = "Error: package_param is not tainted" + return response.json() + + _, params = parse_options_header(response.package_param) + response.result1 = ( + "OK" + if is_pyobject_tainted(params[b"boundary"]) + else "Error: yaml_string is not tainted: %s" % str(params[b"boundary"], "utf-8") + ) + return response.json() diff --git a/tests/appsec/iast_packages/test_packages.py b/tests/appsec/iast_packages/test_packages.py index c62d77c91d4..7fcaaeb7ac4 100644 --- a/tests/appsec/iast_packages/test_packages.py +++ b/tests/appsec/iast_packages/test_packages.py @@ -54,7 +54,7 @@ def __init__( import_name=None, import_module_to_validate=None, test_propagation=False, - fixme_propagation_fails=False, + fixme_propagation_fails=None, expect_no_change=False, ): self.name = name @@ -470,6 +470,15 @@ def uninstall(self, python_cmd): import_name="dateutil", import_module_to_validate="dateutil.relativedelta", ), + PackageForTesting( + "python-multipart", + "0.0.5", # this version validates APPSEC-55240 issue, don't upgrade it + "multipart/form-data; boundary=d8b5635eb590e078a608e083351288a0", + "d8b5635eb590e078a608e083351288a0", + "", + import_module_to_validate="multipart.multipart", + test_propagation=True, + ), PackageForTesting( "pytz", "2024.1", @@ -955,7 +964,7 @@ def test_flask_packages_patched(package, venv): @pytest.mark.parametrize( "package", - [package for package in PACKAGES if package.test_propagation], + [package for package in PACKAGES if package.test_propagation and SKIP_FUNCTION(package)], ids=lambda package: package.name, ) def test_flask_packages_propagation(package, venv, printer): @@ -987,7 +996,7 @@ def test_packages_not_patched_import(package, venv): pytest.skip(reason) return - cmdlist = [venv, _INSIDE_ENV_RUNNER_PATH, "unpatched", package.import_name] + cmdlist = [venv, _INSIDE_ENV_RUNNER_PATH, "unpatched", package.import_module_to_validate] # 1. Try with the specified version package.install(venv) diff --git a/tests/contrib/fastapi/test_fastapi_appsec_iast.py b/tests/contrib/fastapi/test_fastapi_appsec_iast.py index 65d3a931629..7e851045d64 100644 --- a/tests/contrib/fastapi/test_fastapi_appsec_iast.py +++ b/tests/contrib/fastapi/test_fastapi_appsec_iast.py @@ -1,3 +1,4 @@ +import io import json import logging import sys @@ -7,6 +8,7 @@ from fastapi import Form from fastapi import Header from fastapi import Request +from fastapi import UploadFile from fastapi import __version__ as _fastapi_version from fastapi.responses import JSONResponse import pytest @@ -483,6 +485,37 @@ async def test_route(item: Item): assert result["ranges_origin"] == "http.request.body" +@pytest.mark.skipif(fastapi_version < (0, 65, 0), reason="UploadFile not supported") +def test_path_body_body_upload(fastapi_application, client, tracer, test_spans): + @fastapi_application.post("/uploadfile/") + async def create_upload_file(files: typing.List[UploadFile]): + from ddtrace.appsec._iast._taint_tracking import get_tainted_ranges + + ranges_result = get_tainted_ranges(files[0]) + return JSONResponse( + { + "filenames": [file.filename for file in files], + "is_tainted": len(ranges_result), + } + ) + + with override_global_config(dict(_iast_enabled=True)), override_env(IAST_ENV): + # disable callback + _aux_appsec_prepare_tracer(tracer) + tmp = io.BytesIO(b"upload this") + resp = client.post( + "/uploadfile/", + files=( + ("files", ("test.txt", tmp)), + ("files", ("test2.txt", tmp)), + ), + ) + assert resp.status_code == 200 + result = json.loads(get_response_body(resp)) + assert result["filenames"] == ["test.txt", "test2.txt"] + assert result["is_tainted"] == 0 + + def test_fastapi_sqli_path_param(fastapi_application, client, tracer, test_spans): @fastapi_application.get("/index.html/{param_str}") async def test_route(param_str): From 9f576ff1fcaf52c398398cd23b2c2586b5f86691 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:51:19 +0000 Subject: [PATCH 05/14] fix(flare): handle non-dict AGENT_CONFIG products from RC [backport 2.13] (#11065) Backport a482830bdec4414a29eaf292ca46f9d3c6fc2a5f from #10985 to 2.13. Tracer flares were failing due to AGENT_CONFIG items possibly containing pure booleans, and not just dictionaries. Example: ``` AGENT_CONFIG=[True, {...config...}, ...] ``` This would break the flare because it would be expecting only dicts. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: erikayasuda <153395705+erikayasuda@users.noreply.github.com> --- ddtrace/internal/flare/_subscribers.py | 6 ++++-- ddtrace/internal/flare/flare.py | 1 - ddtrace/internal/flare/handler.py | 18 ++++++++++++++++-- .../fix-tracer-flare-e4003d01b434267a.yaml | 5 +++++ tests/internal/test_tracer_flare.py | 2 +- 5 files changed, 26 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/fix-tracer-flare-e4003d01b434267a.yaml diff --git a/ddtrace/internal/flare/_subscribers.py b/ddtrace/internal/flare/_subscribers.py index 29ea0bea658..046f51b0105 100644 --- a/ddtrace/internal/flare/_subscribers.py +++ b/ddtrace/internal/flare/_subscribers.py @@ -36,7 +36,7 @@ def has_stale_flare(self) -> bool: return flare_age >= stale_age return False - def _get_data_from_connector_and_exec(self): + def _get_data_from_connector_and_exec(self, _=None): if self.has_stale_flare(): log.info( "Tracer flare request started at %s is stale, reverting " @@ -67,6 +67,7 @@ def _get_data_from_connector_and_exec(self): str(self.current_request_start), ) continue + log.info("Preparing tracer flare") if _prepare_tracer_flare(self.flare, configs): self.current_request_start = datetime.now() elif product_type == "AGENT_TASK": @@ -75,7 +76,8 @@ def _get_data_from_connector_and_exec(self): if self.current_request_start is None: log.warning("There is no tracer flare job to complete. Skipping new request.") continue + log.info("Generating and sending tracer flare") if _generate_tracer_flare(self.flare, configs): self.current_request_start = None else: - log.debug("Received unexpected product type for tracer flare: {}", product_type) + log.warning("Received unexpected product type for tracer flare: {}", product_type) diff --git a/ddtrace/internal/flare/flare.py b/ddtrace/internal/flare/flare.py index b37d6e8e013..5f9a582ae29 100644 --- a/ddtrace/internal/flare/flare.py +++ b/ddtrace/internal/flare/flare.py @@ -98,7 +98,6 @@ def send(self, flare_send_req: FlareSendRequest): before sending the flare. """ self.revert_configs() - # We only want the flare to be sent once, even if there are # multiple tracer instances lock_path = self.flare_dir / TRACER_FLARE_LOCK diff --git a/ddtrace/internal/flare/handler.py b/ddtrace/internal/flare/handler.py index 75ddac35188..efc5e872cb8 100644 --- a/ddtrace/internal/flare/handler.py +++ b/ddtrace/internal/flare/handler.py @@ -30,6 +30,7 @@ def __init__(self, callback: Callable, flare: Flare): def _handle_tracer_flare(flare: Flare, data: dict, cleanup: bool = False): if cleanup: + log.info("Reverting tracer flare configurations and cleaning up any generated files") flare.revert_configs() flare.clean_up_files() return @@ -51,7 +52,7 @@ def _handle_tracer_flare(flare: Flare, data: dict, cleanup: bool = False): log.warning("Received unexpected tracer flare product type: %s", product_type) -def _prepare_tracer_flare(flare: Flare, configs: List[dict]) -> bool: +def _prepare_tracer_flare(flare: Flare, configs: List[Any]) -> bool: """ Update configurations to start sending tracer logs to a file to be sent in a flare later. @@ -60,7 +61,13 @@ def _prepare_tracer_flare(flare: Flare, configs: List[dict]) -> bool: # AGENT_CONFIG is currently being used for multiple purposes # We only want to prepare for a tracer flare if the config name # starts with 'flare-log-level' + if not isinstance(c, dict): + log.debug("Config item is not type dict, received type %s instead. Skipping...", str(type(c))) + continue if not c.get("name", "").startswith("flare-log-level"): + log.debug( + "Config item name does not start with flare-log-level, received %s instead. Skipping...", c.get("name") + ) continue flare_log_level = c.get("config", {}).get("log_level").upper() @@ -78,7 +85,14 @@ def _generate_tracer_flare(flare: Flare, configs: List[Any]) -> bool: # AGENT_TASK is currently being used for multiple purposes # We only want to generate the tracer flare if the task_type is # 'tracer_flare' - if type(c) != dict or c.get("task_type") != "tracer_flare": + if not isinstance(c, dict): + log.debug("Config item is not type dict, received type %s instead. Skipping...", str(type(c))) + continue + if c.get("task_type") != "tracer_flare": + log.debug( + "Config item does not have the expected task_type. Expected [tracer_flare], received [%s]. Skipping...", + c.get("task_type"), + ) continue args = c.get("args", {}) flare_request = FlareSendRequest( diff --git a/releasenotes/notes/fix-tracer-flare-e4003d01b434267a.yaml b/releasenotes/notes/fix-tracer-flare-e4003d01b434267a.yaml new file mode 100644 index 00000000000..d3f8c7a3a99 --- /dev/null +++ b/releasenotes/notes/fix-tracer-flare-e4003d01b434267a.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + tracing(flare): Resolves the issue where tracer flares would not be generated if unexpected + types were received in the AGENT_CONFIG remote configuration product. diff --git a/tests/internal/test_tracer_flare.py b/tests/internal/test_tracer_flare.py index af8237c92e4..02a9684d3d5 100644 --- a/tests/internal/test_tracer_flare.py +++ b/tests/internal/test_tracer_flare.py @@ -234,7 +234,7 @@ def write(self): class TracerFlareSubscriberTests(TestCase): - agent_config = [{"name": "flare-log-level", "config": {"log_level": "DEBUG"}}] + agent_config = [False, {"name": "flare-log-level", "config": {"log_level": "DEBUG"}}] agent_task = [ False, { From b9e96d77870e22b6f0542de337d7c9654b378a64 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Mon, 21 Oct 2024 09:34:54 -0400 Subject: [PATCH 06/14] chore(ci): bump riot version for 3.7 compatibility issues [backport 2.13] (#11091) Manual backport of #11085 to 2.13 With the new release of virtualenv==20.27.0, we were seeing new failures with build_base_venv[3.7] (GitLab) and build_base_venv (CircleCI) jobs when running: ``` riot -P -v generate --python=$PYTHON_VERSION ... File "/go/src/github.com/DataDog/apm-reliability/dd-trace-py/.riot/venv_py3717/lib/python3.7/site-packages/pip/_vendor/typing_extensions.py", line 1039 def TypedDict(typename, fields=_marker, /, *, total=True, closed=False, **kwargs): ^ SyntaxError: invalid syntax ERROR:riot.riot:Dev install failed, aborting! ``` We need to pin to virtualenv==20.26.6 to continue supporting python3.7 The upload-artifact and download-artifact bumps were required to unblock the riot CI. It had been a while since the last update to riot, and since then, v2 has been deprecated and v3 will be deprecated next month (Nov 2024), so we are bumping to v4 via tag. The above changes were implemented in riot (https://github.com/DataDog/riot/pull/232), and this PR just bumps the riot version in dd-trace-py from 0.19.1 to 0.20.0 ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .circleci/config.templ.yml | 2 +- .circleci/config.yml | 2 +- .github/workflows/generate-package-versions.yml | 7 ++++--- .github/workflows/requirements-locks.yml | 2 +- .gitlab/package.yml | 2 +- .gitlab/tests.yml | 2 +- hatch.toml | 2 +- scripts/ddtest | 3 +-- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.circleci/config.templ.yml b/.circleci/config.templ.yml index 660e6652408..34adf8c7d0e 100644 --- a/.circleci/config.templ.yml +++ b/.circleci/config.templ.yml @@ -80,7 +80,7 @@ commands: description: "Install riot" steps: # Make sure we install and run riot on Python 3 - - run: pip3 install riot==0.19.1 + - run: pip3 install riot==0.20.0 setup_rust: description: "Install rust toolchain" diff --git a/.circleci/config.yml b/.circleci/config.yml index cbac9570ebe..fcf74292ee3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,7 +31,7 @@ jobs: name: Generate config command: | export GIT_COMMIT_DESC=$(git log -n 1 $CIRCLE_SHA1) - pip3 install riot==0.19.1 + pip3 install riot==0.20.0 riot -P -v run --pass-env -s circleci-gen-config -- -v - continuation/continue: configuration_path: .circleci/config.gen.yml diff --git a/.github/workflows/generate-package-versions.yml b/.github/workflows/generate-package-versions.yml index 73bfce0fca0..b375ec95864 100644 --- a/.github/workflows/generate-package-versions.yml +++ b/.github/workflows/generate-package-versions.yml @@ -13,6 +13,7 @@ jobs: actions: read contents: write pull-requests: write + steps: - uses: actions/checkout@v4 @@ -45,7 +46,7 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.12" - + - name: Set up QEMU uses: docker/setup-qemu-action@v2 @@ -76,7 +77,7 @@ jobs: python -m pip install --upgrade pip pip install packaging pip install requests - pip install riot==0.19.1 + pip install riot==0.20.0 - name: Run regenerate-riot-latest run: scripts/regenerate-riot-latest.sh @@ -98,4 +99,4 @@ jobs: base: main title: "chore: update ${{ env.VENV_NAME }} latest version to ${{ env.NEW_LATEST }}" labels: changelog/no-changelog - body-path: .github/PULL_REQUEST_TEMPLATE.md \ No newline at end of file + body-path: .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/workflows/requirements-locks.yml b/.github/workflows/requirements-locks.yml index fece6261a1a..5716c733f3b 100644 --- a/.github/workflows/requirements-locks.yml +++ b/.github/workflows/requirements-locks.yml @@ -23,7 +23,7 @@ jobs: run: pyenv global 3.10 3.7 3.8 3.9 3.11 3.12 - name: Install Dependencies - run: pip install --upgrade pip && pip install riot + run: pip install --upgrade pip && pip install riot==0.20.0 - name: Generate riot locks run: scripts/compile-and-prune-test-requirements diff --git a/.gitlab/package.yml b/.gitlab/package.yml index 4a532594d10..ae85d108287 100644 --- a/.gitlab/package.yml +++ b/.gitlab/package.yml @@ -8,7 +8,7 @@ build_base_venvs: CMAKE_BUILD_PARALLEL_LEVEL: 24 PIP_VERBOSE: 1 script: - - pip install riot~=0.19.1 + - pip install riot==0.20.0 - riot -P -v generate --python=$PYTHON_VERSION artifacts: name: venv_$PYTHON_VERSION diff --git a/.gitlab/tests.yml b/.gitlab/tests.yml index d983b4290b6..c9f2e2fda1f 100644 --- a/.gitlab/tests.yml +++ b/.gitlab/tests.yml @@ -24,7 +24,7 @@ variables: services: - !reference [.services, ddagent] script: - - pip install riot~=0.19.1 + - pip install riot==0.20.0 - unset DD_SERVICE - unset DD_ENV - unset DD_TAGS diff --git a/hatch.toml b/hatch.toml index 16b90392797..ebd7858afc5 100644 --- a/hatch.toml +++ b/hatch.toml @@ -17,7 +17,7 @@ dependencies = [ "ddapm-test-agent>=1.2.0", "packaging==23.1", "pygments==2.16.1", - "riot==0.19.1", + "riot==0.20.0", "ruff==0.1.3", "clang-format==18.1.5", ] diff --git a/scripts/ddtest b/scripts/ddtest index 81654c0f4fc..c10a1e7c1fc 100755 --- a/scripts/ddtest +++ b/scripts/ddtest @@ -13,8 +13,7 @@ fi for i in {1..3}; do docker-compose pull -q testrunner && break || sleep 3; done # TODO(DEV): Install riot in the docker image -FULL_CMD="pip install -q --disable-pip-version-check riot==0.19.1 && $CMD" - +FULL_CMD="pip install -q --disable-pip-version-check riot==0.20.0 && $CMD" # install and upgrade riot in case testrunner image has not been updated # DEV: Use `--no-TTY` and `--quiet-pull` when running in CircleCI From 268a838d956ad4505f6346619db8a1044f7eb785 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 16:15:11 -0400 Subject: [PATCH 07/14] fix(elasticsearch): set tags on span, regardless of sampling [backport 2.13] (#11057) Backport db9c3ec2a9187e7f5b8ef981d47a4ce0bc301968 from #11000 to 2.13. The Datadog ElasticSearch integration avoids setting metadata on spans when those spans are marked to be dropped. A recent change was added for sampling to lazily determine whether a span is sampled - which means the sample decision is much too late for this code (see https://github.com/DataDog/dd-trace-py/pull/8308). Prior to this fix, some elasticsearch query spans incorrectly failed to set metadata based upon sampling rules, and thus incorrectly participated in metrics. After this fix, the metadata is set regardless of sampling decisions. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Teague Bick --- ddtrace/contrib/internal/elasticsearch/patch.py | 5 ----- ...ticsearch-always-populate-span-tags-c2a212ec706b3f4b.yaml | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) create mode 100644 releasenotes/notes/fix-elasticsearch-always-populate-span-tags-c2a212ec706b3f4b.yaml diff --git a/ddtrace/contrib/internal/elasticsearch/patch.py b/ddtrace/contrib/internal/elasticsearch/patch.py index 560620ad87a..960f34ce60b 100644 --- a/ddtrace/contrib/internal/elasticsearch/patch.py +++ b/ddtrace/contrib/internal/elasticsearch/patch.py @@ -142,11 +142,6 @@ def _perform_request(func, instance, args, kwargs): span.set_tag(SPAN_MEASURED_KEY) - # Only instrument if trace is sampled or if we haven't tried to sample yet - if span.context.sampling_priority is not None and span.context.sampling_priority <= 0: - yield func(*args, **kwargs) - return - method, target = args params = kwargs.get("params") body = kwargs.get("body") diff --git a/releasenotes/notes/fix-elasticsearch-always-populate-span-tags-c2a212ec706b3f4b.yaml b/releasenotes/notes/fix-elasticsearch-always-populate-span-tags-c2a212ec706b3f4b.yaml new file mode 100644 index 00000000000..97d11cda3b8 --- /dev/null +++ b/releasenotes/notes/fix-elasticsearch-always-populate-span-tags-c2a212ec706b3f4b.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + elasticsearch: this fix resolves an issue where span tags were not fully populated on "sampled" spans, causing metric dimensions to be incorrect when spans were prematurely marked as sampled, including resource_name. \ No newline at end of file From 88151a1a9fb735d6c1b16aa8cfb1dca7af36a34c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 11:02:20 +0200 Subject: [PATCH 08/14] chore(iast): propagation error in replace aspect [backport 2.13] (#11128) Backport f5314ed0c18b7a663039356d098c77f259d636ac from #11120 to 2.13. Fix a propagation error in the replace aspect detected in the log explorer ``` File "/usr/local/lib/python3.12/site-packages/django/core/handlers/wsgi.py", line 67, in __init__ self.path = "%s/%s" % (script_name.rstrip("/"), path_info.replace("/", "", 1)) File "/usr/local/lib/python3.12/site-packages/ddtrace/appsec/_iast/_taint_tracking/aspects.py", line 816, in replace_aspect iast_taint_log_error("replace_aspect. {}".format(e)) File "/usr/local/lib/python3.12/site-packages/ddtrace/appsec/_iast/_taint_tracking/__init__.py", line 129, in iast_taint_log_error _set_iast_error_metric("[IAST] Propagation error. %s" % msg) File "/usr/local/lib/python3.12/site-packages/ddtrace/appsec/_deduplications.py", line 44, in __call__ result = self.func(*args, **kwargs) File "/usr/local/lib/python3.12/site-packages/ddtrace/appsec/_iast/_metrics.py", line 70, in _set_iast_error_metric res.extend(traceback.format_stack(limit=20)) File "/usr/local/lib/python3.12/site-packages/ddtrace/appsec/_iast/_taint_tracking/aspects.py", line 809, in replace_aspect aspect_result = aspect_replace_api(candidate_text, old_value, new_value, count, orig_result) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.12/site-packages/ddtrace/appsec/_iast/_taint_tracking/aspects.py", line 763, in aspect_replace_api result_formatted = as_formatted_evidence(new_value, tag_mapping_function=TagMappingMode.Mapper).join(new_elements) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ TypeError: sequence item 0: expected str instance, NoneType found ``` ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Alberto Vara --- .../appsec/_iast/_taint_tracking/aspects.py | 2 ++ .../iast/aspects/test_replace_aspect.py | 25 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects.py b/ddtrace/appsec/_iast/_taint_tracking/aspects.py index b9ddaac57d0..5311b5f0ef5 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects.py +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects.py @@ -743,6 +743,8 @@ def aspect_replace_api( len(old_value), candidate_text_ranges, ) + else: + new_elements = [element for element in new_elements if element is not None] result_formatted = as_formatted_evidence(new_value, tag_mapping_function=TagMappingMode.Mapper).join(new_elements) diff --git a/tests/appsec/iast/aspects/test_replace_aspect.py b/tests/appsec/iast/aspects/test_replace_aspect.py index ea27dc03b89..b30fa7cdede 100644 --- a/tests/appsec/iast/aspects/test_replace_aspect.py +++ b/tests/appsec/iast/aspects/test_replace_aspect.py @@ -683,6 +683,31 @@ def test_replace_tainted_results_in_no_tainted(origstr, substr, replstr, maxcoun assert is_pyobject_tainted(replaced) is False +@pytest.mark.parametrize( + "origstr,formatted", + [ + ("/", ""), + ("", ""), + ("", ""), + ("/waf/", "waf/"), + ("waf/", "waf"), + ("//waf/", "/waf/"), + ("path/waf/", "pathwaf/"), + ("a/:", "a:"), + # TODO: this replace raises basic_string::substr: __pos (which is 4) > this->size() (which is 3) + # ("a/:+-/", ":+-a-+::+-a/-+:"), + ], +) +def test_replace_tainted_results_in_no_tainted_django(origstr, formatted): + path_info = origstr.encode("iso-8859-1").decode() + sep = taint_pyobject(pyobject="/", source_name="d", source_value="/", source_origin=OriginType.PARAMETER) + replaced = ddtrace_aspects.replace_aspect(path_info.replace, 1, path_info, sep, "", 1) + assert replaced == path_info.replace(sep, "", 1) + + assert as_formatted_evidence(replaced) == formatted + assert is_pyobject_tainted(replaced) is False + + @pytest.mark.parametrize( "origstr, substr, replstr, maxcount, formatted", [ From 2f6f2cc09752f3d34f841416c866e4b690f032a9 Mon Sep 17 00:00:00 2001 From: Federico Mon Date: Wed, 23 Oct 2024 13:02:23 +0200 Subject: [PATCH 09/14] fix(iast): partial matches on function names should not be patched [backport 2.13] (#11019) Co-authored-by: Alberto Vara --- ddtrace/appsec/_iast/_ast/visitor.py | 2 +- ...-aspects-partial-matches-f43dc04584ca6788.yaml | 3 +++ tests/appsec/iast/_ast/test_ast_patching.py | 15 +++++++++++++++ .../iast/fixtures/ast/other/globals_builtin.py | 3 +++ 4 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/iast-aspects-partial-matches-f43dc04584ca6788.yaml create mode 100644 tests/appsec/iast/fixtures/ast/other/globals_builtin.py diff --git a/ddtrace/appsec/_iast/_ast/visitor.py b/ddtrace/appsec/_iast/_ast/visitor.py index 6d07aefde8b..dfbff1fa3a8 100644 --- a/ddtrace/appsec/_iast/_ast/visitor.py +++ b/ddtrace/appsec/_iast/_ast/visitor.py @@ -274,7 +274,7 @@ def _should_replace_with_taint_sink(self, call_node: ast.Call, is_function: bool if function_name in self._taint_sink_replace_disabled: return False - return any(allowed in function_name for allowed in self._taint_sink_replace_any) + return function_name in self._taint_sink_replace_any def _add_original_function_as_arg(self, call_node: ast.Call, is_function: bool) -> Any: """ diff --git a/releasenotes/notes/iast-aspects-partial-matches-f43dc04584ca6788.yaml b/releasenotes/notes/iast-aspects-partial-matches-f43dc04584ca6788.yaml new file mode 100644 index 00000000000..d20f910ba30 --- /dev/null +++ b/releasenotes/notes/iast-aspects-partial-matches-f43dc04584ca6788.yaml @@ -0,0 +1,3 @@ +fixes: + - | + Code security: This fix resolves an issue where partial matches on function names we aimed to patch were being patched instead of full matches on them. diff --git a/tests/appsec/iast/_ast/test_ast_patching.py b/tests/appsec/iast/_ast/test_ast_patching.py index 155a0270bc3..607915cff9d 100644 --- a/tests/appsec/iast/_ast/test_ast_patching.py +++ b/tests/appsec/iast/_ast/test_ast_patching.py @@ -172,3 +172,18 @@ def test_module_path_none(caplog): with caplog.at_level(logging.DEBUG), mock.patch("ddtrace.internal.module.Path.resolve", side_effect=AttributeError): assert ("", "") == astpatch_module(__import__("tests.appsec.iast.fixtures.ast.str.class_str", fromlist=[None])) assert "astpatch_source couldn't find the module: tests.appsec.iast.fixtures.ast.str.class_str" in caplog.text + + +@pytest.mark.parametrize( + "module_name", + [ + ("tests.appsec.iast.fixtures.ast.other.globals_builtin"), + ], +) +def test_astpatch_globals_module_unchanged(module_name): + """ + This is a regression test for partially matching function names: + ``globals()`` was being incorrectly patched with the aspect for ``glob()`` + """ + module_path, new_source = astpatch_module(__import__(module_name, fromlist=[None])) + assert ("", "") == (module_path, new_source) diff --git a/tests/appsec/iast/fixtures/ast/other/globals_builtin.py b/tests/appsec/iast/fixtures/ast/other/globals_builtin.py new file mode 100644 index 00000000000..411f0643843 --- /dev/null +++ b/tests/appsec/iast/fixtures/ast/other/globals_builtin.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python3 + +_globals = globals() From 051e24796fd482d81376abc9fcc22c676e148774 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 24 Oct 2024 17:37:40 +0000 Subject: [PATCH 10/14] fix(llmobs): recreate eval metric writer on fork [backport 2.13] (#10766) Backport 5dbd7ef543031f50dd6600ea0af8b28773d56fc4 from #10710 to 2.13. This PR ensures that the `LLMObsEvalMetricWriter` is correctly recreated and restarted on a forked process. Previously, on a process fork we were not recreating/restarting the eval metric writer worker. Mirrors https://github.com/DataDog/dd-trace-py/pull/10249 but for eval metric writer ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: lievan <42917263+lievan@users.noreply.github.com> Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com> Co-authored-by: Quinna Halim --- ddtrace/llmobs/_llmobs.py | 4 +- ddtrace/llmobs/_writer.py | 8 +++ ...d-eval-metric-writer-55d50d1ed8fc10ea.yaml | 6 ++ tests/llmobs/test_llmobs_service.py | 70 +++++++++++++++---- 4 files changed, 73 insertions(+), 15 deletions(-) create mode 100644 releasenotes/notes/fix-llmobs-forked-eval-metric-writer-55d50d1ed8fc10ea.yaml diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index 4dfe243e59d..7735c3901fc 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -90,6 +90,7 @@ def __init__(self, tracer=None): def _child_after_fork(self): self._llmobs_span_writer = self._llmobs_span_writer.recreate() + self._llmobs_eval_metric_writer = self._llmobs_eval_metric_writer.recreate() self._trace_processor._span_writer = self._llmobs_span_writer tracer_filters = self.tracer._filters if not any(isinstance(tracer_filter, LLMObsTraceProcessor) for tracer_filter in tracer_filters): @@ -97,8 +98,9 @@ def _child_after_fork(self): self.tracer.configure(settings={"FILTERS": tracer_filters}) try: self._llmobs_span_writer.start() + self._llmobs_eval_metric_writer.start() except ServiceStatusError: - log.debug("Error starting LLMObs span writer after fork") + log.debug("Error starting LLMObs writers after fork") def _start_service(self) -> None: tracer_filters = self.tracer._filters diff --git a/ddtrace/llmobs/_writer.py b/ddtrace/llmobs/_writer.py index 29bddbd98d4..0b79176b51a 100644 --- a/ddtrace/llmobs/_writer.py +++ b/ddtrace/llmobs/_writer.py @@ -141,6 +141,14 @@ def _url(self) -> str: def _data(self, events: List[Any]) -> Dict[str, Any]: raise NotImplementedError + def recreate(self) -> "BaseLLMObsWriter": + return self.__class__( + site=self._site, + api_key=self._api_key, + interval=self._interval, + timeout=self._timeout, + ) + class LLMObsEvalMetricWriter(BaseLLMObsWriter): """Writer to the Datadog LLMObs Custom Eval Metrics Endpoint.""" diff --git a/releasenotes/notes/fix-llmobs-forked-eval-metric-writer-55d50d1ed8fc10ea.yaml b/releasenotes/notes/fix-llmobs-forked-eval-metric-writer-55d50d1ed8fc10ea.yaml new file mode 100644 index 00000000000..ef72a6bf315 --- /dev/null +++ b/releasenotes/notes/fix-llmobs-forked-eval-metric-writer-55d50d1ed8fc10ea.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + LLM Observability: This fix resolves an issue where LLM Observability evaluation metrics were not being submitted + in forked processes. The evaluation metric writer thread now automatically restarts when a forked + process is detected. \ No newline at end of file diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py index ef51663cd9a..bd455878be8 100644 --- a/tests/llmobs/test_llmobs_service.py +++ b/tests/llmobs/test_llmobs_service.py @@ -1362,20 +1362,7 @@ def test_activate_distributed_headers_activates_context(LLMObs, mock_logs): mock_activate.assert_called_once_with(dummy_context) -def _task(llmobs_service, errors, original_pid, original_span_writer_id): - """Task in test_llmobs_fork which asserts that LLMObs in a forked process correctly recreates the writer.""" - try: - with llmobs_service.workflow(): - with llmobs_service.task(): - assert llmobs_service._instance.tracer._pid != original_pid - assert id(llmobs_service._instance._llmobs_span_writer) != original_span_writer_id - assert llmobs_service._instance._llmobs_span_writer.enqueue.call_count == 2 - assert llmobs_service._instance._llmobs_span_writer._encoder.encode.call_count == 2 - except AssertionError as e: - errors.put(e) - - -def test_llmobs_fork_recreates_and_restarts_writer(): +def test_llmobs_fork_recreates_and_restarts_span_writer(): """Test that forking a process correctly recreates and restarts the LLMObsSpanWriter.""" with mock.patch("ddtrace.internal.writer.HTTPWriter._send_payload"): llmobs_service.enable(_tracer=DummyTracer(), ml_app="test_app") @@ -1405,6 +1392,30 @@ def test_llmobs_fork_recreates_and_restarts_writer(): llmobs_service.disable() +def test_llmobs_fork_recreates_and_restarts_eval_metric_writer(): + """Test that forking a process correctly recreates and restarts the LLMObsSpanWriter.""" + with mock.patch("ddtrace.llmobs._writer.BaseLLMObsWriter.periodic"): + llmobs_service.enable(_tracer=DummyTracer(), ml_app="test_app") + original_pid = llmobs_service._instance.tracer._pid + original_eval_metric_writer = llmobs_service._instance._llmobs_eval_metric_writer + pid = os.fork() + if pid: # parent + assert llmobs_service._instance.tracer._pid == original_pid + assert llmobs_service._instance._llmobs_eval_metric_writer == original_eval_metric_writer + assert llmobs_service._instance._llmobs_eval_metric_writer.status == ServiceStatus.RUNNING + else: # child + assert llmobs_service._instance.tracer._pid != original_pid + assert llmobs_service._instance._llmobs_eval_metric_writer != original_eval_metric_writer + assert llmobs_service._instance._llmobs_eval_metric_writer.status == ServiceStatus.RUNNING + llmobs_service.disable() + os._exit(12) + + _, status = os.waitpid(pid, 0) + exit_code = os.WEXITSTATUS(status) + assert exit_code == 12 + llmobs_service.disable() + + def test_llmobs_fork_create_span(monkeypatch): """Test that forking a process correctly encodes new spans created in each process.""" monkeypatch.setenv("_DD_LLMOBS_WRITER_INTERVAL", 5.0) @@ -1429,6 +1440,37 @@ def test_llmobs_fork_create_span(monkeypatch): llmobs_service.disable() +def test_llmobs_fork_submit_evaluation(monkeypatch): + """Test that forking a process correctly encodes new spans created in each process.""" + monkeypatch.setenv("_DD_LLMOBS_WRITER_INTERVAL", 5.0) + with mock.patch("ddtrace.llmobs._writer.BaseLLMObsWriter.periodic"): + llmobs_service.enable(_tracer=DummyTracer(), ml_app="test_app", api_key="test_api_key") + pid = os.fork() + if pid: # parent + llmobs_service.submit_evaluation( + span_context={"span_id": "123", "trace_id": "456"}, + label="toxicity", + metric_type="categorical", + value="high", + ) + assert len(llmobs_service._instance._llmobs_eval_metric_writer._buffer) == 1 + else: # child + llmobs_service.submit_evaluation( + span_context={"span_id": "123", "trace_id": "456"}, + label="toxicity", + metric_type="categorical", + value="high", + ) + assert len(llmobs_service._instance._llmobs_eval_metric_writer._buffer) == 1 + llmobs_service.disable() + os._exit(12) + + _, status = os.waitpid(pid, 0) + exit_code = os.WEXITSTATUS(status) + assert exit_code == 12 + llmobs_service.disable() + + def test_llmobs_fork_custom_filter(monkeypatch): """Test that forking a process correctly keeps any custom filters.""" From 0c0aa92b333ef4123e63ccace840ffcf1fc2abc0 Mon Sep 17 00:00:00 2001 From: lievan <42917263+lievan@users.noreply.github.com> Date: Fri, 25 Oct 2024 16:01:08 -0400 Subject: [PATCH 11/14] fix(llmobs): don't drop IO annotations equal to zero (#11044) [backport 2.13] (#11162) Backport https://github.com/DataDog/dd-trace-py/commit/2f23eb28c6b4d98b6cac4f84edc2ee3c42095aad from https://github.com/DataDog/dd-trace-py/pull/11044 to 2.13. Fixes an issue where we are dropping I/O annotations that were equal to zero for workflow, task, agent and tool spans. - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: lievan --- ddtrace/llmobs/_llmobs.py | 6 +++--- .../fix-numeric-annotations-7cf06b5ceb615282.yaml | 5 +++++ tests/llmobs/test_llmobs_service.py | 11 +++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/fix-numeric-annotations-7cf06b5ceb615282.yaml diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index 7735c3901fc..30e360c55ba 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -511,7 +511,7 @@ def annotate( if parameters is not None: log.warning("Setting parameters is deprecated, please set parameters and other metadata as tags instead.") cls._tag_params(span, parameters) - if input_data or output_data: + if input_data is not None or output_data is not None: if span_kind == "llm": cls._tag_llm_io(span, input_messages=input_data, output_messages=output_data) elif span_kind == "embedding": @@ -601,9 +601,9 @@ def _tag_text_io(cls, span, input_value=None, output_value=None): """Tags input/output values for non-LLM kind spans. Will be mapped to span's `meta.{input,output}.values` fields. """ - if input_value: + if input_value is not None: span.set_tag_str(INPUT_VALUE, safe_json(input_value)) - if output_value: + if output_value is not None: span.set_tag_str(OUTPUT_VALUE, safe_json(output_value)) @staticmethod diff --git a/releasenotes/notes/fix-numeric-annotations-7cf06b5ceb615282.yaml b/releasenotes/notes/fix-numeric-annotations-7cf06b5ceb615282.yaml new file mode 100644 index 00000000000..05ed3ddb964 --- /dev/null +++ b/releasenotes/notes/fix-numeric-annotations-7cf06b5ceb615282.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + LLM Observability: This fix resolves an issue where input and output values equal to zero were not being annotated + on workflow, task, agent and tool spans when using `LLMObs.annotate`. diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py index bd455878be8..e726cbd995f 100644 --- a/tests/llmobs/test_llmobs_service.py +++ b/tests/llmobs/test_llmobs_service.py @@ -471,6 +471,17 @@ def test_annotate_input_string(LLMObs): assert retrieval_span.get_tag(INPUT_VALUE) == "test_input" +def test_annotate_numeric_io(LLMObs): + with LLMObs.task() as task_span: + LLMObs.annotate(span=task_span, input_data=0, output_data=0) + assert task_span.get_tag(INPUT_VALUE) == "0" + assert task_span.get_tag(OUTPUT_VALUE) == "0" + with LLMObs.task() as task_span: + LLMObs.annotate(span=task_span, input_data=1.23, output_data=1.23) + assert task_span.get_tag(INPUT_VALUE) == "1.23" + assert task_span.get_tag(OUTPUT_VALUE) == "1.23" + + def test_annotate_input_serializable_value(LLMObs): with LLMObs.task() as task_span: LLMObs.annotate(span=task_span, input_data=["test_input"]) From 827fcc5c3f6972918230ba3ab89b57ad11961009 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Mon, 28 Oct 2024 11:55:57 -0400 Subject: [PATCH 12/14] fix(profiling): handle `Span`s with `None` span type for stack v2 endpoint profiling [backport 2.13] (#11176) Manual backport of #11164 to 2.13 Fixes #11141 ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [ ] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .../datadog/profiling/stack_v2/src/stack_v2.cpp | 10 ++++++++-- ...ck-v2-endpoint-span-type-none-48a1196296be3742.yaml | 5 +++++ 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/profiling-stack-v2-endpoint-span-type-none-48a1196296be3742.yaml diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp index 62fa4b38b77..b5cd8a81312 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp @@ -91,7 +91,7 @@ _stack_v2_link_span(PyObject* self, PyObject* args, PyObject* kwargs) uint64_t thread_id; uint64_t span_id; uint64_t local_root_span_id; - const char* span_type; + const char* span_type = nullptr; PyThreadState* state = PyThreadState_Get(); @@ -104,10 +104,16 @@ _stack_v2_link_span(PyObject* self, PyObject* args, PyObject* kwargs) static const char* const_kwlist[] = { "span_id", "local_root_span_id", "span_type", NULL }; static char** kwlist = const_cast(const_kwlist); - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "KKs", kwlist, &span_id, &local_root_span_id, &span_type)) { + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "KKz", kwlist, &span_id, &local_root_span_id, &span_type)) { return NULL; } + // From Python, span_type is a string or None, and when given None, it is passed as a nullptr. + static const std::string empty_string = ""; + if (span_type == nullptr) { + span_type = empty_string.c_str(); + } + ThreadSpanLinks::get_instance().link_span(thread_id, span_id, local_root_span_id, std::string(span_type)); Py_RETURN_NONE; diff --git a/releasenotes/notes/profiling-stack-v2-endpoint-span-type-none-48a1196296be3742.yaml b/releasenotes/notes/profiling-stack-v2-endpoint-span-type-none-48a1196296be3742.yaml new file mode 100644 index 00000000000..1e4fea09afe --- /dev/null +++ b/releasenotes/notes/profiling-stack-v2-endpoint-span-type-none-48a1196296be3742.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + profiling: resolves an issue where endpoint profiling for stack v2 throws + ``TypeError`` exception when it is given a ``Span`` with ``None`` span_type. From 52f51e87b2c387c54932ea07e9dc3f0d4d780ba3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 10:07:42 +0000 Subject: [PATCH 13/14] fix(ci_visibility): strip out comments in codeowners [backport 2.13] (#11192) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backport 4dde7b84ba21247265850310dd685d8a1b0f0c20 from #11173 to 2.13. ## Description JIRA: https://datadoghq.atlassian.net/browse/SDTEST-1153 We do not strip the comments for the codeowners sections causing [issues like this](https://app.datadoghq.com/ci/test-runs?query=test_level:test&agg_m=count&agg_m_source=base&agg_t=count&citest_explorer_sort=@test.codeowners,desc&cols=@test.status,@test.codeowners,timestamp,@test.suite,@test.name,@duration,@test.service,@git.branch¤tTab=overview&eventStack=AgAAAZLDS54Ikm5TUgAAAAAAAAAYAAAAAEFaTERTNkJYQUFCVDNOMEllNXhGMUxmNwAAACQAAAAAMDE5MmMzNGQtNTM3Mi00NGY5LWI3OGQtM2RjNGMzODdjNjNi&fromUser=false&graphType=flamegraph&index=citest&testId=AgAAAZLDS54Ikm5TUgAAAAAAAAAYAAAAAEFaTERTNkJYQUFCVDNOMEllNXhGMUxmNwAAACQAAAAAMDE5MmMzNGQtNTM3Mi00NGY5LWI3OGQtM2RjNGMzODdjNjNi&trace=AgAAAZLDS54Ikm5TUgAAAAAAAAAYAAAAAEFaTERTNkJYQUFCVDNOMEllNXhGMUxmNwAAACQAAAAAMDE5MmMzNGQtNTM3Mi00NGY5LWI3OGQtM2RjNGMzODdjNjNi&start=1729248608094&end=1729853408094&paused=false) This PR: - Just checks for `#` and strips out to the end of the line ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Ángel Paredes --- ddtrace/internal/codeowners.py | 9 ++++++--- ...x_codeowners_including_comments-82d9cb733a2c7285.yaml | 3 +++ tests/internal/test_codeowners.py | 3 +++ 3 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/ci_visibility-fix_codeowners_including_comments-82d9cb733a2c7285.yaml diff --git a/ddtrace/internal/codeowners.py b/ddtrace/internal/codeowners.py index fa1e63f7add..ead8e2f31b8 100644 --- a/ddtrace/internal/codeowners.py +++ b/ddtrace/internal/codeowners.py @@ -152,11 +152,14 @@ def parse(self) -> None: patterns = [] for line in f.readlines(): line = line.strip() + + if "#" in line: + # Strip out the comment from the line + line = line.split("#", 1)[0].strip() + if line == "": continue - # Lines starting with '#' are comments. - if line.startswith("#"): - continue + if line.startswith("[") and line.endswith("]"): # found a code owners section continue diff --git a/releasenotes/notes/ci_visibility-fix_codeowners_including_comments-82d9cb733a2c7285.yaml b/releasenotes/notes/ci_visibility-fix_codeowners_including_comments-82d9cb733a2c7285.yaml new file mode 100644 index 00000000000..3fb4bb5abd8 --- /dev/null +++ b/releasenotes/notes/ci_visibility-fix_codeowners_including_comments-82d9cb733a2c7285.yaml @@ -0,0 +1,3 @@ +--- +fixes: + - CI Visibility: "fixes a bug where ``CODEOWNERS`` would incorrectly fail to discard line-level trailing comments (eg: ``@code/owner # my comment`` would result in codeowners being parsed as ``@code/owner``, ``#``, ``my``, and ``comment``)" diff --git a/tests/internal/test_codeowners.py b/tests/internal/test_codeowners.py index fdf3ba28d65..5536eeea7dc 100644 --- a/tests/internal/test_codeowners.py +++ b/tests/internal/test_codeowners.py @@ -9,9 +9,12 @@ def test_invalid_codeowners(testdir): ^[invalid optional section bar.py @bars + # Inline comment case + baz.py @DataDog/the-owner # all that should be ignored """ codeowners_file = testdir.makefile("", CODEOWNERS=codeowners) c = Codeowners(path=codeowners_file.strpath) assert c.of("foo.py") == ["@default"] assert c.of("bar.py") == ["@bars"] + assert c.of("baz.py") == ["@DataDog/the-owner"] From fdb023cde1fee8f2cfb7475b4509140db3f24879 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 29 Oct 2024 12:10:23 +0000 Subject: [PATCH 14/14] fix(ci_visibility): avoid git tracebacks when .git dir is absent [backport 2.13] (#11196) Backport 5051de3d2fac03415052bf3acf440685b30b5151 from #11175 to 2.13. Fixes #10983 where the git metadata upload would log exceptions when trying to upload git metadata without a working `git` environment (eg: missing `git` executable or no `.git` directory). ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: Romain Komorn <136473744+romainkomorndatadog@users.noreply.github.com> Co-authored-by: Romain Komorn --- ddtrace/internal/ci_visibility/git_client.py | 13 +++++++++++++ ...logged_errors_when_gitless-66a6cb3245314f3e.yaml | 4 ++++ tests/ci_visibility/test_ci_visibility.py | 10 ++++++++++ tests/contrib/pytest/test_pytest.py | 6 +++++- 4 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/ci_visibility-fix_logged_errors_when_gitless-66a6cb3245314f3e.yaml diff --git a/ddtrace/internal/ci_visibility/git_client.py b/ddtrace/internal/ci_visibility/git_client.py index 3e8f35844b2..7bc08ca4195 100644 --- a/ddtrace/internal/ci_visibility/git_client.py +++ b/ddtrace/internal/ci_visibility/git_client.py @@ -22,6 +22,7 @@ from ddtrace.ext.git import extract_commit_sha from ddtrace.ext.git import extract_git_version from ddtrace.ext.git import extract_remote_url +from ddtrace.ext.git import extract_workspace_path from ddtrace.internal.agent import get_trace_url from ddtrace.internal.compat import JSONDecodeError from ddtrace.internal.logger import get_logger @@ -94,8 +95,20 @@ def __init__( elif self._requests_mode == REQUESTS_MODE.AGENTLESS_EVENTS: self._base_url = "https://api.{}{}".format(os.getenv("DD_SITE", AGENTLESS_DEFAULT_SITE), GIT_API_BASE_PATH) + def _get_git_dir(self, cwd=None): + # type: (Optional[str]) -> Optional[str] + try: + return extract_workspace_path(cwd=cwd) + except (FileNotFoundError, ValueError): + return None + def upload_git_metadata(self, cwd=None): # type: (Optional[str]) -> None + if not self._get_git_dir(cwd=cwd): + log.debug("Missing .git directory; skipping git metadata upload") + self._metadata_upload_status.value = METADATA_UPLOAD_STATUS.FAILED # type: ignore[attr-defined] + return + self._tags = ci.tags(cwd=cwd) if self._worker is None: self._worker = Process( diff --git a/releasenotes/notes/ci_visibility-fix_logged_errors_when_gitless-66a6cb3245314f3e.yaml b/releasenotes/notes/ci_visibility-fix_logged_errors_when_gitless-66a6cb3245314f3e.yaml new file mode 100644 index 00000000000..f248c2ad640 --- /dev/null +++ b/releasenotes/notes/ci_visibility-fix_logged_errors_when_gitless-66a6cb3245314f3e.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - CI Visibility: fixes unnecessary logging of an exception that would appear when trying to upload git metadata in + an environment without functioning git (eg: missing ``git`` binary or ``.git`` directory) diff --git a/tests/ci_visibility/test_ci_visibility.py b/tests/ci_visibility/test_ci_visibility.py index 86cdd96012a..c2f0723c031 100644 --- a/tests/ci_visibility/test_ci_visibility.py +++ b/tests/ci_visibility/test_ci_visibility.py @@ -999,6 +999,7 @@ def mock_git_client(self): _get_filtered_revisions=mock.Mock(return_value="latest3\nlatest4"), _upload_packfiles=mock.Mock(side_effec=NotImplementedError), _do_request=mock.Mock(side_effect=NotImplementedError), + _get_git_dir=mock.Mock(return_value="does_not_matter"), ), mock.patch( "ddtrace.internal.ci_visibility.git_client._build_git_packfiles_with_details" ) as mock_build_packfiles: @@ -1082,6 +1083,15 @@ def test_upload_git_metadata_upload_unnecessary(self, api_key, requests_mode): git_client.upload_git_metadata() assert git_client.wait_for_metadata_upload_status() == METADATA_UPLOAD_STATUS.UNNECESSARY + @pytest.mark.parametrize("api_key, requests_mode", api_key_requests_mode_parameters) + def test_upload_git_metadata_upload_no_git_dir(self, api_key, requests_mode): + with mock.patch.object(CIVisibilityGitClient, "_get_git_dir", mock.Mock(return_value=None)): + git_client = CIVisibilityGitClient(api_key, requests_mode) + git_client.upload_git_metadata() + + # Notably, this should _not_ raise ValueError + assert git_client.wait_for_metadata_upload_status() == METADATA_UPLOAD_STATUS.FAILED + def test_get_filtered_revisions(): with mock.patch( diff --git a/tests/contrib/pytest/test_pytest.py b/tests/contrib/pytest/test_pytest.py index 33cc6ebc811..ee0188d1489 100644 --- a/tests/contrib/pytest/test_pytest.py +++ b/tests/contrib/pytest/test_pytest.py @@ -3940,7 +3940,11 @@ def test_add_two_number_list(): ) self.testdir.chdir() - with mock.patch("ddtrace.ext.git._get_executable_path", return_value=None): + with mock.patch("ddtrace.ext.git._get_executable_path", return_value=None), mock.patch( + "ddtrace.internal.ci_visibility.recorder.ddconfig", + _get_default_civisibility_ddconfig(), + ) as mock_ddconfig: + mock_ddconfig._ci_visibility_agentless_enabled = True self.inline_run("--ddtrace") spans = self.pop_spans()