diff --git a/.gitlab/package.yml b/.gitlab/package.yml
index 115ada9f264..2f896637599 100644
--- a/.gitlab/package.yml
+++ b/.gitlab/package.yml
@@ -7,6 +7,7 @@ build_base_venvs:
   variables:
     CMAKE_BUILD_PARALLEL_LEVEL: 12
     PIP_VERBOSE: 1
+    DD_PROFILING_NATIVE_TESTS: 1
   script:
     - pip install riot==0.20.0
     - riot -P -v generate --python=$PYTHON_VERSION
@@ -16,6 +17,7 @@ build_base_venvs:
       - .riot/venv_*
       - ddtrace/**/*.so*
       - ddtrace/internal/datadog/profiling/crashtracker/crashtracker_exe*
+      - ddtrace/internal/datadog/profiling/test/test_*
 
 download_ddtrace_artifacts:
   image: registry.ddbuild.io/github-cli:v27480869-eafb11d-2.43.0
diff --git a/.riot/requirements/acb0de0.txt b/.riot/requirements/118a763.txt
similarity index 79%
rename from .riot/requirements/acb0de0.txt
rename to .riot/requirements/118a763.txt
index 5717bdd07bc..96dddb83cd5 100644
--- a/.riot/requirements/acb0de0.txt
+++ b/.riot/requirements/118a763.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/acb0de0.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/118a763.in
 #
 attrs==24.2.0
 coverage[toml]==7.6.4
@@ -18,9 +18,10 @@ protobuf==5.28.3
 py-cpuinfo==8.0.0
 pytest==8.3.3
 pytest-asyncio==0.21.1
-pytest-benchmark==4.0.0
-pytest-cov==5.0.0
+pytest-benchmark==5.1.0
+pytest-cov==6.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
-pytest-randomly==3.15.0
+pytest-randomly==3.16.0
 sortedcontainers==2.4.0
-uwsgi==2.0.27
+uwsgi==2.0.28
diff --git a/.riot/requirements/13e7fea.txt b/.riot/requirements/125c1e6.txt
similarity index 66%
rename from .riot/requirements/13e7fea.txt
rename to .riot/requirements/125c1e6.txt
index a9ae6e94a09..77714a4a160 100644
--- a/.riot/requirements/13e7fea.txt
+++ b/.riot/requirements/125c1e6.txt
@@ -2,33 +2,32 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/13e7fea.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/125c1e6.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.1
+coverage[toml]==7.6.4
 exceptiongroup==1.2.2
-gevent==21.8.0
-greenlet==1.1.0
+gevent==24.11.1
+greenlet==3.1.1
 gunicorn[gevent]==23.0.0
 hypothesis==6.45.0
 iniconfig==2.0.0
-lz4==4.3.3
 mock==5.1.0
 opentracing==2.4.0
-packaging==24.1
+packaging==24.2
 pluggy==1.5.0
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
-pytest-benchmark==4.0.0
-pytest-cov==5.0.0
+pytest-benchmark==5.1.0
+pytest-cov==6.0.0
 pytest-mock==3.14.0
-pytest-randomly==3.15.0
+pytest-randomly==3.16.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
+tomli==2.0.2
+uwsgi==2.0.28
 zope-event==5.0
-zope-interface==7.0.3
+zope-interface==7.1.1
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==74.1.2
+setuptools==75.3.0
diff --git a/.riot/requirements/1a594ce.txt b/.riot/requirements/12a4316.txt
similarity index 65%
rename from .riot/requirements/1a594ce.txt
rename to .riot/requirements/12a4316.txt
index 7e3f9aa19be..de9c85604c9 100644
--- a/.riot/requirements/1a594ce.txt
+++ b/.riot/requirements/12a4316.txt
@@ -2,25 +2,25 @@
 # This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1a594ce.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/12a4316.in
 #
-attrs==23.2.0
-coverage[toml]==7.6.0
-gevent==22.8.0
-greenlet==1.1.3.post0
+attrs==24.2.0
+coverage[toml]==7.6.4
+gevent==24.11.1
+greenlet==3.1.1
 hypothesis==6.45.0
 iniconfig==2.0.0
 mock==5.1.0
 opentracing==2.4.0
-packaging==24.1
+packaging==24.2
 pluggy==1.5.0
-pytest==8.3.1
-pytest-cov==5.0.0
+pytest==8.3.3
+pytest-cov==6.0.0
 pytest-mock==3.14.0
-pytest-randomly==3.15.0
+pytest-randomly==3.16.0
 sortedcontainers==2.4.0
 zope-event==5.0
-zope-interface==6.4.post2
+zope-interface==7.1.1
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==71.1.0
+setuptools==75.3.0
diff --git a/.riot/requirements/1359ebb.txt b/.riot/requirements/1359ebb.txt
new file mode 100644
index 00000000000..75c10c261a8
--- /dev/null
+++ b/.riot/requirements/1359ebb.txt
@@ -0,0 +1,35 @@
+#
+# This file is autogenerated by pip-compile with Python 3.10
+# by the following command:
+#
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1359ebb.in
+#
+attrs==24.2.0
+coverage[toml]==7.6.4
+exceptiongroup==1.2.2
+gevent==24.11.1
+greenlet==3.1.1
+gunicorn[gevent]==23.0.0
+hypothesis==6.45.0
+iniconfig==2.0.0
+lz4==4.3.3
+mock==5.1.0
+opentracing==2.4.0
+packaging==24.2
+pluggy==1.5.0
+py-cpuinfo==8.0.0
+pytest==8.3.3
+pytest-asyncio==0.21.1
+pytest-benchmark==5.1.0
+pytest-cov==6.0.0
+pytest-cpp==2.6.0
+pytest-mock==3.14.0
+pytest-randomly==3.16.0
+sortedcontainers==2.4.0
+tomli==2.1.0
+uwsgi==2.0.28
+zope-event==5.0
+zope-interface==7.1.1
+
+# The following packages are considered to be unsafe in a requirements file:
+setuptools==75.4.0
diff --git a/.riot/requirements/1ffb22d.txt b/.riot/requirements/1413039.txt
similarity index 68%
rename from .riot/requirements/1ffb22d.txt
rename to .riot/requirements/1413039.txt
index 69a20e3f210..e05e2893ae6 100644
--- a/.riot/requirements/1ffb22d.txt
+++ b/.riot/requirements/1413039.txt
@@ -2,29 +2,29 @@
 # This file is autogenerated by pip-compile with Python 3.8
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1ffb22d.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1413039.in
 #
-attrs==23.2.0
-coverage[toml]==7.6.0
+attrs==24.2.0
+coverage[toml]==7.6.1
 exceptiongroup==1.2.2
-gevent==20.12.1
-greenlet==1.0.0
+gevent==24.2.1
+greenlet==3.1.1
 hypothesis==6.45.0
-importlib-metadata==8.2.0
+importlib-metadata==8.5.0
 iniconfig==2.0.0
 mock==5.1.0
 opentracing==2.4.0
-packaging==24.1
+packaging==24.2
 pluggy==1.5.0
-pytest==8.3.1
+pytest==8.3.3
 pytest-cov==5.0.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-zipp==3.19.2
+tomli==2.0.2
+zipp==3.20.2
 zope-event==5.0
-zope-interface==6.4.post2
+zope-interface==7.1.1
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==71.1.0
+setuptools==75.3.0
diff --git a/.riot/requirements/10fe95f.txt b/.riot/requirements/1560353.txt
similarity index 78%
rename from .riot/requirements/10fe95f.txt
rename to .riot/requirements/1560353.txt
index 12611af2b08..4b50732d926 100644
--- a/.riot/requirements/10fe95f.txt
+++ b/.riot/requirements/1560353.txt
@@ -2,30 +2,30 @@
 # This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/10fe95f.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1560353.in
 #
 attrs==24.2.0
 coverage[toml]==7.6.4
-gevent==23.9.0
+gevent==24.11.1
 greenlet==3.1.1
 gunicorn[gevent]==23.0.0
 hypothesis==6.45.0
 iniconfig==2.0.0
 mock==5.1.0
 opentracing==2.4.0
-packaging==24.1
+packaging==24.2
 pluggy==1.5.0
 py-cpuinfo==8.0.0
 pytest==8.3.3
 pytest-asyncio==0.21.1
-pytest-benchmark==4.0.0
-pytest-cov==5.0.0
+pytest-benchmark==5.1.0
+pytest-cov==6.0.0
 pytest-mock==3.14.0
-pytest-randomly==3.15.0
+pytest-randomly==3.16.0
 sortedcontainers==2.4.0
-uwsgi==2.0.27
+uwsgi==2.0.28
 zope-event==5.0
 zope-interface==7.1.1
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==75.2.0
+setuptools==75.3.0
diff --git a/.riot/requirements/5286dd4.txt b/.riot/requirements/15e90ee.txt
similarity index 77%
rename from .riot/requirements/5286dd4.txt
rename to .riot/requirements/15e90ee.txt
index cfa763a35cf..1855f4db88a 100644
--- a/.riot/requirements/5286dd4.txt
+++ b/.riot/requirements/15e90ee.txt
@@ -2,13 +2,13 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/5286dd4.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/15e90ee.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.1
+coverage[toml]==7.6.3
 exceptiongroup==1.2.2
-gevent==24.2.1
-greenlet==3.0.3
+gevent==24.10.2
+greenlet==3.1.1
 gunicorn[gevent]==23.0.0
 hypothesis==6.45.0
 iniconfig==2.0.0
@@ -18,17 +18,18 @@ opentracing==2.4.0
 packaging==24.1
 pluggy==1.5.0
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
+tomli==2.0.2
+uwsgi==2.0.27
 zope-event==5.0
-zope-interface==7.0.3
+zope-interface==7.1.0
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==74.1.2
+setuptools==75.2.0
diff --git a/.riot/requirements/1744bab.txt b/.riot/requirements/1600ae2.txt
similarity index 85%
rename from .riot/requirements/1744bab.txt
rename to .riot/requirements/1600ae2.txt
index 2b7720a4378..d330d767bc1 100644
--- a/.riot/requirements/1744bab.txt
+++ b/.riot/requirements/1600ae2.txt
@@ -2,13 +2,14 @@
 # This file is autogenerated by pip-compile with Python 3.7
 # by the following command:
 #
-#    pip-compile --allow-unsafe --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/1744bab.in
+#    pip-compile --allow-unsafe --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/1600ae2.in
 #
 attrs==24.2.0
+colorama==0.4.6
 coverage[toml]==7.2.7
 exceptiongroup==1.2.2
 gevent==22.10.2
-greenlet==3.0.3
+greenlet==3.1.1
 gunicorn[gevent]==23.0.0
 hypothesis==6.45.0
 importlib-metadata==6.7.0
@@ -23,12 +24,13 @@ pytest==7.4.4
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==4.1.0
+pytest-cpp==2.5.0
 pytest-mock==3.11.1
 pytest-randomly==3.12.0
 sortedcontainers==2.4.0
 tomli==2.0.1
 typing-extensions==4.7.1
-uwsgi==2.0.26
+uwsgi==2.0.27
 zipp==3.15.0
 zope-event==5.0
 zope-interface==6.4.post2
diff --git a/.riot/requirements/18598d3.txt b/.riot/requirements/18008a7.txt
similarity index 82%
rename from .riot/requirements/18598d3.txt
rename to .riot/requirements/18008a7.txt
index 49e2a01fcd8..f27f68fb2ce 100644
--- a/.riot/requirements/18598d3.txt
+++ b/.riot/requirements/18008a7.txt
@@ -2,10 +2,10 @@
 # This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/18598d3.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/18008a7.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.1
+coverage[toml]==7.6.3
 gunicorn==23.0.0
 hypothesis==6.45.0
 iniconfig==2.0.0
@@ -14,13 +14,14 @@ mock==5.1.0
 opentracing==2.4.0
 packaging==24.1
 pluggy==1.5.0
-protobuf==5.28.0
+protobuf==5.28.2
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-uwsgi==2.0.26
+uwsgi==2.0.27
diff --git a/.riot/requirements/197de71.txt b/.riot/requirements/197de71.txt
deleted file mode 100644
index 12558ce029e..00000000000
--- a/.riot/requirements/197de71.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.11
-# by the following command:
-#
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/197de71.in
-#
-attrs==23.2.0
-coverage[toml]==7.6.0
-gevent==22.10.2
-greenlet==3.0.3
-gunicorn[gevent]==22.0.0
-hypothesis==6.45.0
-iniconfig==2.0.0
-mock==5.1.0
-opentracing==2.4.0
-packaging==24.1
-pluggy==1.5.0
-py-cpuinfo==8.0.0
-pytest==8.3.1
-pytest-asyncio==0.21.1
-pytest-benchmark==4.0.0
-pytest-cov==5.0.0
-pytest-mock==3.14.0
-pytest-randomly==3.15.0
-sortedcontainers==2.4.0
-uwsgi==2.0.26
-zope-event==5.0
-zope-interface==6.4.post2
-
-# The following packages are considered to be unsafe in a requirements file:
-setuptools==71.1.0
diff --git a/.riot/requirements/194a34b.txt b/.riot/requirements/19a43a5.txt
similarity index 61%
rename from .riot/requirements/194a34b.txt
rename to .riot/requirements/19a43a5.txt
index b7e8734b55d..25a918eb7ad 100644
--- a/.riot/requirements/194a34b.txt
+++ b/.riot/requirements/19a43a5.txt
@@ -2,29 +2,29 @@
 # This file is autogenerated by pip-compile with Python 3.9
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/194a34b.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/19a43a5.in
 #
-attrs==23.2.0
-coverage[toml]==7.6.0
+attrs==24.2.0
+coverage[toml]==7.6.4
 exceptiongroup==1.2.2
-gevent==21.1.2
-greenlet==1.1.3.post0
+gevent==24.11.1
+greenlet==3.1.1
 hypothesis==6.45.0
-importlib-metadata==8.2.0
+importlib-metadata==8.5.0
 iniconfig==2.0.0
 mock==5.1.0
 opentracing==2.4.0
-packaging==24.1
+packaging==24.2
 pluggy==1.5.0
-pytest==8.3.1
-pytest-cov==5.0.0
+pytest==8.3.3
+pytest-cov==6.0.0
 pytest-mock==3.14.0
-pytest-randomly==3.15.0
+pytest-randomly==3.16.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-zipp==3.19.2
+tomli==2.0.2
+zipp==3.21.0
 zope-event==5.0
-zope-interface==6.4.post2
+zope-interface==7.1.1
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==71.1.0
+setuptools==75.3.0
diff --git a/.riot/requirements/1f484c3.txt b/.riot/requirements/1a9ec51.txt
similarity index 83%
rename from .riot/requirements/1f484c3.txt
rename to .riot/requirements/1a9ec51.txt
index 8df64afa9f9..832fac60dff 100644
--- a/.riot/requirements/1f484c3.txt
+++ b/.riot/requirements/1a9ec51.txt
@@ -2,10 +2,10 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1f484c3.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1a9ec51.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.1
+coverage[toml]==7.6.3
 exceptiongroup==1.2.2
 gunicorn==23.0.0
 hypothesis==6.45.0
@@ -17,12 +17,13 @@ packaging==24.1
 pluggy==1.5.0
 protobuf==3.19.0
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
+tomli==2.0.2
+uwsgi==2.0.27
diff --git a/.riot/requirements/ab034bd.txt b/.riot/requirements/1b284db.txt
similarity index 78%
rename from .riot/requirements/ab034bd.txt
rename to .riot/requirements/1b284db.txt
index 8189def48ce..890031e7402 100644
--- a/.riot/requirements/ab034bd.txt
+++ b/.riot/requirements/1b284db.txt
@@ -2,16 +2,16 @@
 # This file is autogenerated by pip-compile with Python 3.8
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/ab034bd.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1b284db.in
 #
 attrs==24.2.0
 coverage[toml]==7.6.1
 exceptiongroup==1.2.2
 gevent==24.2.1
-greenlet==3.0.3
+greenlet==3.1.1
 gunicorn[gevent]==23.0.0
 hypothesis==6.45.0
-importlib-metadata==8.4.0
+importlib-metadata==8.5.0
 iniconfig==2.0.0
 lz4==4.3.3
 mock==5.1.0
@@ -19,18 +19,19 @@ opentracing==2.4.0
 packaging==24.1
 pluggy==1.5.0
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
-zipp==3.20.1
+tomli==2.0.2
+uwsgi==2.0.27
+zipp==3.20.2
 zope-event==5.0
-zope-interface==7.0.3
+zope-interface==7.1.0
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==74.1.2
+setuptools==75.2.0
diff --git a/.riot/requirements/d6c4509.txt b/.riot/requirements/1bc8c1c.txt
similarity index 82%
rename from .riot/requirements/d6c4509.txt
rename to .riot/requirements/1bc8c1c.txt
index 833f8557797..163028cbf3e 100644
--- a/.riot/requirements/d6c4509.txt
+++ b/.riot/requirements/1bc8c1c.txt
@@ -2,14 +2,14 @@
 # This file is autogenerated by pip-compile with Python 3.8
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/d6c4509.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1bc8c1c.in
 #
 attrs==24.2.0
 coverage[toml]==7.6.1
 exceptiongroup==1.2.2
 gunicorn==23.0.0
 hypothesis==6.45.0
-importlib-metadata==8.4.0
+importlib-metadata==8.5.0
 iniconfig==2.0.0
 lz4==4.3.3
 mock==5.1.0
@@ -18,13 +18,14 @@ packaging==24.1
 pluggy==1.5.0
 protobuf==3.19.0
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
-zipp==3.20.1
+tomli==2.0.2
+uwsgi==2.0.27
+zipp==3.20.2
diff --git a/.riot/requirements/d79db4d.txt b/.riot/requirements/1d20be2.txt
similarity index 78%
rename from .riot/requirements/d79db4d.txt
rename to .riot/requirements/1d20be2.txt
index 53f97535f4c..b3e24418d56 100644
--- a/.riot/requirements/d79db4d.txt
+++ b/.riot/requirements/1d20be2.txt
@@ -2,14 +2,14 @@
 # This file is autogenerated by pip-compile with Python 3.9
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/d79db4d.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1d20be2.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.1
+coverage[toml]==7.6.3
 exceptiongroup==1.2.2
 gunicorn==23.0.0
 hypothesis==6.45.0
-importlib-metadata==8.4.0
+importlib-metadata==8.5.0
 iniconfig==2.0.0
 lz4==4.3.3
 mock==5.1.0
@@ -18,13 +18,14 @@ packaging==24.1
 pluggy==1.5.0
 protobuf==3.19.0
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
-zipp==3.20.1
+tomli==2.0.2
+uwsgi==2.0.27
+zipp==3.20.2
diff --git a/.riot/requirements/ac92a08.txt b/.riot/requirements/1d21682.txt
similarity index 57%
rename from .riot/requirements/ac92a08.txt
rename to .riot/requirements/1d21682.txt
index e1d20066e86..93fd622b954 100644
--- a/.riot/requirements/ac92a08.txt
+++ b/.riot/requirements/1d21682.txt
@@ -1,32 +1,33 @@
 #
-# This file is autogenerated by pip-compile with Python 3.11
+# This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/ac92a08.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1d21682.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.1
-gevent==22.10.2
-greenlet==3.0.3
+coverage[toml]==7.6.4
+gevent==24.11.1
+greenlet==3.1.1
 gunicorn[gevent]==23.0.0
 hypothesis==6.45.0
 iniconfig==2.0.0
 lz4==4.3.3
 mock==5.1.0
 opentracing==2.4.0
-packaging==24.1
+packaging==24.2
 pluggy==1.5.0
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
-pytest-benchmark==4.0.0
-pytest-cov==5.0.0
+pytest-benchmark==5.1.0
+pytest-cov==6.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
-pytest-randomly==3.15.0
+pytest-randomly==3.16.0
 sortedcontainers==2.4.0
-uwsgi==2.0.26
+uwsgi==2.0.28
 zope-event==5.0
-zope-interface==7.0.3
+zope-interface==7.1.1
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==74.1.2
+setuptools==75.4.0
diff --git a/.riot/requirements/1dfcf17.txt b/.riot/requirements/1dfcf17.txt
deleted file mode 100644
index ad86edc1aee..00000000000
--- a/.riot/requirements/1dfcf17.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.10
-# by the following command:
-#
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1dfcf17.in
-#
-attrs==23.2.0
-coverage[toml]==7.6.0
-exceptiongroup==1.2.2
-gevent==21.8.0
-greenlet==1.1.0
-gunicorn[gevent]==22.0.0
-hypothesis==6.45.0
-iniconfig==2.0.0
-mock==5.1.0
-opentracing==2.4.0
-packaging==24.1
-pluggy==1.5.0
-py-cpuinfo==8.0.0
-pytest==8.3.1
-pytest-asyncio==0.21.1
-pytest-benchmark==4.0.0
-pytest-cov==5.0.0
-pytest-mock==3.14.0
-pytest-randomly==3.15.0
-sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
-zope-event==5.0
-zope-interface==6.4.post2
-
-# The following packages are considered to be unsafe in a requirements file:
-setuptools==71.1.0
diff --git a/.riot/requirements/1e185ef.txt b/.riot/requirements/1e185ef.txt
deleted file mode 100644
index e358d5e4586..00000000000
--- a/.riot/requirements/1e185ef.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# This file is autogenerated by pip-compile with Python 3.11
-# by the following command:
-#
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1e185ef.in
-#
-attrs==24.2.0
-coverage[toml]==7.6.1
-gevent==24.2.1
-greenlet==3.0.3
-gunicorn[gevent]==23.0.0
-hypothesis==6.45.0
-iniconfig==2.0.0
-lz4==4.3.3
-mock==5.1.0
-opentracing==2.4.0
-packaging==24.1
-pluggy==1.5.0
-py-cpuinfo==8.0.0
-pytest==8.3.2
-pytest-asyncio==0.21.1
-pytest-benchmark==4.0.0
-pytest-cov==5.0.0
-pytest-mock==3.14.0
-pytest-randomly==3.15.0
-sortedcontainers==2.4.0
-uwsgi==2.0.26
-zope-event==5.0
-zope-interface==7.0.3
-
-# The following packages are considered to be unsafe in a requirements file:
-setuptools==74.1.2
diff --git a/.riot/requirements/4132bce.txt b/.riot/requirements/1edf426.txt
similarity index 65%
rename from .riot/requirements/4132bce.txt
rename to .riot/requirements/1edf426.txt
index 60ecafb5e93..56a5eb28b4d 100644
--- a/.riot/requirements/4132bce.txt
+++ b/.riot/requirements/1edf426.txt
@@ -2,25 +2,25 @@
 # This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/4132bce.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1edf426.in
 #
-attrs==23.2.0
-coverage[toml]==7.6.0
-gevent==23.9.1
-greenlet==3.0.3
+attrs==24.2.0
+coverage[toml]==7.6.4
+gevent==24.11.1
+greenlet==3.1.1
 hypothesis==6.45.0
 iniconfig==2.0.0
 mock==5.1.0
 opentracing==2.4.0
-packaging==24.1
+packaging==24.2
 pluggy==1.5.0
-pytest==8.3.1
-pytest-cov==5.0.0
+pytest==8.3.3
+pytest-cov==6.0.0
 pytest-mock==3.14.0
-pytest-randomly==3.15.0
+pytest-randomly==3.16.0
 sortedcontainers==2.4.0
 zope-event==5.0
-zope-interface==6.4.post2
+zope-interface==7.1.1
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==71.1.0
+setuptools==75.3.0
diff --git a/.riot/requirements/52fe8c7.txt b/.riot/requirements/1ef2187.txt
similarity index 65%
rename from .riot/requirements/52fe8c7.txt
rename to .riot/requirements/1ef2187.txt
index 844526d9445..b430f5158b2 100644
--- a/.riot/requirements/52fe8c7.txt
+++ b/.riot/requirements/1ef2187.txt
@@ -2,27 +2,27 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/52fe8c7.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1ef2187.in
 #
-attrs==23.2.0
-coverage[toml]==7.6.0
+attrs==24.2.0
+coverage[toml]==7.6.4
 exceptiongroup==1.2.2
-gevent==21.8.0
-greenlet==1.1.3.post0
+gevent==24.11.1
+greenlet==3.1.1
 hypothesis==6.45.0
 iniconfig==2.0.0
 mock==5.1.0
 opentracing==2.4.0
-packaging==24.1
+packaging==24.2
 pluggy==1.5.0
-pytest==8.3.1
-pytest-cov==5.0.0
+pytest==8.3.3
+pytest-cov==6.0.0
 pytest-mock==3.14.0
-pytest-randomly==3.15.0
+pytest-randomly==3.16.0
 sortedcontainers==2.4.0
-tomli==2.0.1
+tomli==2.0.2
 zope-event==5.0
-zope-interface==6.4.post2
+zope-interface==7.1.1
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==71.1.0
+setuptools==75.3.0
diff --git a/.riot/requirements/c351d44.txt b/.riot/requirements/2e36381.txt
similarity index 78%
rename from .riot/requirements/c351d44.txt
rename to .riot/requirements/2e36381.txt
index 363b32208e7..8629f1a5892 100644
--- a/.riot/requirements/c351d44.txt
+++ b/.riot/requirements/2e36381.txt
@@ -1,12 +1,12 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/c351d44.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/2e36381.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.4
-gevent==23.9.0
+coverage[toml]==7.6.3
+gevent==24.10.2
 greenlet==3.1.1
 gunicorn[gevent]==23.0.0
 hypothesis==6.45.0
@@ -21,12 +21,13 @@ pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
 uwsgi==2.0.27
 zope-event==5.0
-zope-interface==7.1.1
+zope-interface==7.1.0
 
 # The following packages are considered to be unsafe in a requirements file:
 setuptools==75.2.0
diff --git a/.riot/requirements/12526ba.txt b/.riot/requirements/4dd0ff3.txt
similarity index 81%
rename from .riot/requirements/12526ba.txt
rename to .riot/requirements/4dd0ff3.txt
index ecee134c825..539971a6683 100644
--- a/.riot/requirements/12526ba.txt
+++ b/.riot/requirements/4dd0ff3.txt
@@ -2,10 +2,10 @@
 # This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/12526ba.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/4dd0ff3.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.1
+coverage[toml]==7.6.3
 exceptiongroup==1.2.2
 gunicorn==23.0.0
 hypothesis==6.45.0
@@ -15,14 +15,15 @@ mock==5.1.0
 opentracing==2.4.0
 packaging==24.1
 pluggy==1.5.0
-protobuf==5.28.0
+protobuf==5.28.2
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
+tomli==2.0.2
+uwsgi==2.0.27
diff --git a/.riot/requirements/7f8c636.txt b/.riot/requirements/6fe81be.txt
similarity index 86%
rename from .riot/requirements/7f8c636.txt
rename to .riot/requirements/6fe81be.txt
index da175e96287..f49865ef3bd 100644
--- a/.riot/requirements/7f8c636.txt
+++ b/.riot/requirements/6fe81be.txt
@@ -2,9 +2,10 @@
 # This file is autogenerated by pip-compile with Python 3.7
 # by the following command:
 #
-#    pip-compile --allow-unsafe --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/7f8c636.in
+#    pip-compile --allow-unsafe --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/6fe81be.in
 #
 attrs==24.2.0
+colorama==0.4.6
 coverage[toml]==7.2.7
 exceptiongroup==1.2.2
 gunicorn==23.0.0
@@ -22,13 +23,14 @@ pytest==7.4.4
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==4.1.0
+pytest-cpp==2.5.0
 pytest-mock==3.11.1
 pytest-randomly==3.12.0
 six==1.16.0
 sortedcontainers==2.4.0
 tomli==2.0.1
 typing-extensions==4.7.1
-uwsgi==2.0.26
+uwsgi==2.0.27
 zipp==3.15.0
 
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/.riot/requirements/1aecc60.txt b/.riot/requirements/a541d7e.txt
similarity index 85%
rename from .riot/requirements/1aecc60.txt
rename to .riot/requirements/a541d7e.txt
index feb58960c23..8b110208b7f 100644
--- a/.riot/requirements/1aecc60.txt
+++ b/.riot/requirements/a541d7e.txt
@@ -2,10 +2,10 @@
 # This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1aecc60.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/a541d7e.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.1
+coverage[toml]==7.6.3
 gunicorn==23.0.0
 hypothesis==6.45.0
 iniconfig==2.0.0
@@ -16,11 +16,12 @@ packaging==24.1
 pluggy==1.5.0
 protobuf==4.22.0
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-uwsgi==2.0.26
+uwsgi==2.0.27
diff --git a/.riot/requirements/167dd48.txt b/.riot/requirements/b83f7ca.txt
similarity index 73%
rename from .riot/requirements/167dd48.txt
rename to .riot/requirements/b83f7ca.txt
index e3580c6518a..72d6ac027ea 100644
--- a/.riot/requirements/167dd48.txt
+++ b/.riot/requirements/b83f7ca.txt
@@ -2,16 +2,16 @@
 # This file is autogenerated by pip-compile with Python 3.9
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/167dd48.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/b83f7ca.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.1
+coverage[toml]==7.6.3
 exceptiongroup==1.2.2
-gevent==24.2.1
-greenlet==3.0.3
+gevent==24.10.2
+greenlet==3.1.1
 gunicorn[gevent]==23.0.0
 hypothesis==6.45.0
-importlib-metadata==8.4.0
+importlib-metadata==8.5.0
 iniconfig==2.0.0
 lz4==4.3.3
 mock==5.1.0
@@ -19,18 +19,19 @@ opentracing==2.4.0
 packaging==24.1
 pluggy==1.5.0
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
-zipp==3.20.1
+tomli==2.0.2
+uwsgi==2.0.27
+zipp==3.20.2
 zope-event==5.0
-zope-interface==7.0.3
+zope-interface==7.1.0
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==74.1.2
+setuptools==75.2.0
diff --git a/.riot/requirements/8a0f886.txt b/.riot/requirements/d2b6740.txt
similarity index 84%
rename from .riot/requirements/8a0f886.txt
rename to .riot/requirements/d2b6740.txt
index 19b8536b36e..83fed7d33d5 100644
--- a/.riot/requirements/8a0f886.txt
+++ b/.riot/requirements/d2b6740.txt
@@ -2,9 +2,10 @@
 # This file is autogenerated by pip-compile with Python 3.7
 # by the following command:
 #
-#    pip-compile --allow-unsafe --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/8a0f886.in
+#    pip-compile --allow-unsafe --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/d2b6740.in
 #
 attrs==24.2.0
+colorama==0.4.6
 coverage[toml]==7.2.7
 exceptiongroup==1.2.2
 gunicorn==23.0.0
@@ -22,10 +23,11 @@ pytest==7.4.4
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==4.1.0
+pytest-cpp==2.5.0
 pytest-mock==3.11.1
 pytest-randomly==3.12.0
 sortedcontainers==2.4.0
 tomli==2.0.1
 typing-extensions==4.7.1
-uwsgi==2.0.26
+uwsgi==2.0.27
 zipp==3.15.0
diff --git a/.riot/requirements/1e17619.txt b/.riot/requirements/d3718d9.txt
similarity index 79%
rename from .riot/requirements/1e17619.txt
rename to .riot/requirements/d3718d9.txt
index 11ba3494e76..83abce2c34c 100644
--- a/.riot/requirements/1e17619.txt
+++ b/.riot/requirements/d3718d9.txt
@@ -2,29 +2,30 @@
 # This file is autogenerated by pip-compile with Python 3.8
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/1e17619.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/d3718d9.in
 #
 attrs==24.2.0
 coverage[toml]==7.6.1
 exceptiongroup==1.2.2
 gunicorn==23.0.0
 hypothesis==6.45.0
-importlib-metadata==8.4.0
+importlib-metadata==8.5.0
 iniconfig==2.0.0
 lz4==4.3.3
 mock==5.1.0
 opentracing==2.4.0
 packaging==24.1
 pluggy==1.5.0
-protobuf==5.28.0
+protobuf==5.28.2
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
-zipp==3.20.1
+tomli==2.0.2
+uwsgi==2.0.27
+zipp==3.20.2
diff --git a/.riot/requirements/11aeb6a.txt b/.riot/requirements/d62d369.txt
similarity index 79%
rename from .riot/requirements/11aeb6a.txt
rename to .riot/requirements/d62d369.txt
index 9107c0564a8..4e6a1334fb6 100644
--- a/.riot/requirements/11aeb6a.txt
+++ b/.riot/requirements/d62d369.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/11aeb6a.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/d62d369.in
 #
 attrs==24.2.0
 coverage[toml]==7.6.4
@@ -18,9 +18,10 @@ protobuf==4.22.0
 py-cpuinfo==8.0.0
 pytest==8.3.3
 pytest-asyncio==0.21.1
-pytest-benchmark==4.0.0
-pytest-cov==5.0.0
+pytest-benchmark==5.1.0
+pytest-cov==6.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
-pytest-randomly==3.15.0
+pytest-randomly==3.16.0
 sortedcontainers==2.4.0
-uwsgi==2.0.27
+uwsgi==2.0.28
diff --git a/.riot/requirements/16f0a68.txt b/.riot/requirements/de95112.txt
similarity index 85%
rename from .riot/requirements/16f0a68.txt
rename to .riot/requirements/de95112.txt
index 043b35fd3fb..127a4b50d75 100644
--- a/.riot/requirements/16f0a68.txt
+++ b/.riot/requirements/de95112.txt
@@ -2,13 +2,13 @@
 # This file is autogenerated by pip-compile with Python 3.7
 # by the following command:
 #
-#    pip-compile --allow-unsafe --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/16f0a68.in
+#    pip-compile --allow-unsafe --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/de95112.in
 #
-attrs==23.2.0
+attrs==24.2.0
 coverage[toml]==7.2.7
 exceptiongroup==1.2.2
-gevent==20.12.1
-greenlet==1.0.0
+gevent==22.10.2
+greenlet==3.1.1
 hypothesis==6.45.0
 importlib-metadata==6.7.0
 iniconfig==2.0.0
diff --git a/.riot/requirements/18d2618.txt b/.riot/requirements/f59e90e.txt
similarity index 75%
rename from .riot/requirements/18d2618.txt
rename to .riot/requirements/f59e90e.txt
index b9b6eae6054..61250075c82 100644
--- a/.riot/requirements/18d2618.txt
+++ b/.riot/requirements/f59e90e.txt
@@ -2,29 +2,30 @@
 # This file is autogenerated by pip-compile with Python 3.9
 # by the following command:
 #
-#    pip-compile --allow-unsafe --no-annotate .riot/requirements/18d2618.in
+#    pip-compile --allow-unsafe --no-annotate .riot/requirements/f59e90e.in
 #
 attrs==24.2.0
-coverage[toml]==7.6.1
+coverage[toml]==7.6.3
 exceptiongroup==1.2.2
 gunicorn==23.0.0
 hypothesis==6.45.0
-importlib-metadata==8.4.0
+importlib-metadata==8.5.0
 iniconfig==2.0.0
 lz4==4.3.3
 mock==5.1.0
 opentracing==2.4.0
 packaging==24.1
 pluggy==1.5.0
-protobuf==5.28.0
+protobuf==5.28.2
 py-cpuinfo==8.0.0
-pytest==8.3.2
+pytest==8.3.3
 pytest-asyncio==0.21.1
 pytest-benchmark==4.0.0
 pytest-cov==5.0.0
+pytest-cpp==2.6.0
 pytest-mock==3.14.0
 pytest-randomly==3.15.0
 sortedcontainers==2.4.0
-tomli==2.0.1
-uwsgi==2.0.26
-zipp==3.20.1
+tomli==2.0.2
+uwsgi==2.0.27
+zipp==3.20.2
diff --git a/ddtrace/contrib/pytest/_plugin_v2.py b/ddtrace/contrib/pytest/_plugin_v2.py
index 2f7816343ac..7dc7d278de7 100644
--- a/ddtrace/contrib/pytest/_plugin_v2.py
+++ b/ddtrace/contrib/pytest/_plugin_v2.py
@@ -11,13 +11,8 @@
 from ddtrace.contrib.internal.coverage.patch import run_coverage_report
 from ddtrace.contrib.internal.coverage.utils import _is_coverage_invoked_by_coverage_run
 from ddtrace.contrib.internal.coverage.utils import _is_coverage_patched
-from ddtrace.contrib.pytest._atr_utils import atr_get_failed_reports
-from ddtrace.contrib.pytest._atr_utils import atr_get_teststatus
-from ddtrace.contrib.pytest._atr_utils import atr_handle_retries
-from ddtrace.contrib.pytest._atr_utils import atr_pytest_terminal_summary_post_yield
 from ddtrace.contrib.pytest._plugin_v1 import _extract_reason
 from ddtrace.contrib.pytest._plugin_v1 import _is_pytest_cov_enabled
-from ddtrace.contrib.pytest._retry_utils import get_retry_num
 from ddtrace.contrib.pytest._types import _pytest_report_teststatus_return_type
 from ddtrace.contrib.pytest._types import pytest_CallInfo
 from ddtrace.contrib.pytest._types import pytest_Config
@@ -34,6 +29,7 @@
 from ddtrace.contrib.pytest._utils import _pytest_marked_to_skip
 from ddtrace.contrib.pytest._utils import _pytest_version_supports_atr
 from ddtrace.contrib.pytest._utils import _pytest_version_supports_efd
+from ddtrace.contrib.pytest._utils import _pytest_version_supports_retries
 from ddtrace.contrib.pytest._utils import _TestOutcome
 from ddtrace.contrib.pytest.constants import FRAMEWORK
 from ddtrace.contrib.pytest.constants import XFAIL_REASON
@@ -62,12 +58,20 @@
 from ddtrace.internal.test_visibility.coverage_lines import CoverageLines
 
 
+if _pytest_version_supports_retries():
+    from ddtrace.contrib.pytest._retry_utils import get_retry_num
+
 if _pytest_version_supports_efd():
     from ddtrace.contrib.pytest._efd_utils import efd_get_failed_reports
     from ddtrace.contrib.pytest._efd_utils import efd_get_teststatus
     from ddtrace.contrib.pytest._efd_utils import efd_handle_retries
     from ddtrace.contrib.pytest._efd_utils import efd_pytest_terminal_summary_post_yield
 
+if _pytest_version_supports_atr():
+    from ddtrace.contrib.pytest._atr_utils import atr_get_failed_reports
+    from ddtrace.contrib.pytest._atr_utils import atr_get_teststatus
+    from ddtrace.contrib.pytest._atr_utils import atr_handle_retries
+    from ddtrace.contrib.pytest._atr_utils import atr_pytest_terminal_summary_post_yield
 
 log = get_logger(__name__)
 
@@ -417,7 +421,7 @@ def _process_result(item, call, result) -> _TestOutcome:
 
 def _pytest_runtest_makereport(item: pytest.Item, call: pytest_CallInfo, outcome: pytest_TestReport) -> None:
     # When ATR or EFD retries are active, we do not want makereport to generate results
-    if get_retry_num(item.nodeid) is not None:
+    if _pytest_version_supports_retries() and get_retry_num(item.nodeid) is not None:
         return
 
     original_result = outcome.get_result()
@@ -507,6 +511,10 @@ def _pytest_terminal_summary_post_yield(terminalreporter, failed_reports_initial
 @pytest.hookimpl(hookwrapper=True, tryfirst=True)
 def pytest_terminal_summary(terminalreporter, exitstatus, config):
     """Report flaky or failed tests"""
+    if not is_test_visibility_enabled():
+        yield
+        return
+
     failed_reports_initial_size = None
     try:
         failed_reports_initial_size = _pytest_terminal_summary_pre_yield(terminalreporter)
@@ -563,6 +571,9 @@ def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:
 def pytest_report_teststatus(
     report: pytest_TestReport,
 ) -> _pytest_report_teststatus_return_type:
+    if not is_test_visibility_enabled():
+        return
+
     if _pytest_version_supports_atr() and InternalTestSession.atr_is_enabled():
         test_status = atr_get_teststatus(report)
         if test_status is not None:
diff --git a/ddtrace/internal/datadog/profiling/cmake/FindLibdatadog.cmake b/ddtrace/internal/datadog/profiling/cmake/FindLibdatadog.cmake
index e713722698b..c74851b9e65 100644
--- a/ddtrace/internal/datadog/profiling/cmake/FindLibdatadog.cmake
+++ b/ddtrace/internal/datadog/profiling/cmake/FindLibdatadog.cmake
@@ -5,7 +5,7 @@ endif()
 
 include(ExternalProject)
 set(TAG_LIBDATADOG
-    "v14.0.0"
+    "v14.1.0"
     CACHE STRING "libdatadog github tag")
 
 set(Datadog_BUILD_DIR ${CMAKE_BINARY_DIR}/libdatadog)
diff --git a/ddtrace/internal/datadog/profiling/cmake/tools/libdatadog_checksums.txt b/ddtrace/internal/datadog/profiling/cmake/tools/libdatadog_checksums.txt
index d2e19b88f78..a6a65ce0f90 100644
--- a/ddtrace/internal/datadog/profiling/cmake/tools/libdatadog_checksums.txt
+++ b/ddtrace/internal/datadog/profiling/cmake/tools/libdatadog_checksums.txt
@@ -1,5 +1,5 @@
-6aa3a1dd9664f1bb51aa64e647344f48deb0b07a2c0c95cfa40af0fd0463cb08 libdatadog-aarch64-alpine-linux-musl.tar.gz
-fa29ac61904b0481bcaaf2cc3aff844ac058ce92d0a4d7cfed25e4f178442359 libdatadog-aarch64-apple-darwin.tar.gz
-44cde6f2b406842e9e94b36cc04aadfcc628242c634cf103bde2f4907640d39a libdatadog-aarch64-unknown-linux-gnu.tar.gz
-0aaed4bbbd30dc77c9e2cd5c9bbc011d101086eb6eada6332f0a8276cd67b691 libdatadog-x86_64-alpine-linux-musl.tar.gz
-c88fa1f191637e7e42776d2139721294cebc697d3cc951b972f677bb08d641fd libdatadog-x86_64-unknown-linux-gnu.tar.gz
+fc6be3383d3a115804c43e2c66dd176c63f33b362d987d9b1211034e2b549c2d libdatadog-aarch64-alpine-linux-musl.tar.gz
+b9c972afea19696ee6a459d2fa65563b738baf77dcb12739c8e4ae44d1c975fb libdatadog-aarch64-apple-darwin.tar.gz
+1a9bc4d99d23f7baf403b6b7527f9b9d76bdb166dc34656150561dcb148cc90b libdatadog-aarch64-unknown-linux-gnu.tar.gz
+8244831681332dfa939eefe6923fe6a8beaffff48cb336f836b55a438078add1 libdatadog-x86_64-alpine-linux-musl.tar.gz
+76fcb3bfe3b3971d77f6dd4968ffe6bd5f6a1ada82e2e990a78919107dc2ee40 libdatadog-x86_64-unknown-linux-gnu.tar.gz
diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp
index 310bf3b95bd..ca355cac97c 100644
--- a/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp
+++ b/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp
@@ -74,6 +74,12 @@ void
 Datadog::SampleManager::postfork_child()
 {
     Datadog::Sample::postfork_child();
+    if (sample_pool != nullptr) {
+        // Clear the pool to make sure it's in a consistent state.
+        // Suppose there was a thread that was adding/removing sample from the pool
+        // and the fork happened in the middle of that operation.
+        sample_pool = std::make_unique<SynchronizedSamplePool>(sample_pool_capacity);
+    }
 }
 
 void
diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt b/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt
index 9ff45b93108..e7fe2ceb3ed 100644
--- a/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt
+++ b/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt
@@ -22,11 +22,18 @@ function(dd_wrapper_add_test name)
     add_ddup_config(${name})
 
     gtest_discover_tests(${name})
+
+    set_target_properties(${name} PROPERTIES INSTALL_RPATH "$ORIGIN/..")
+
+    if(LIB_INSTALL_DIR)
+        install(TARGETS ${name}
+            RUNTIME DESTINATION ${LIB_INSTALL_DIR}/../test)
+    endif()
 endfunction()
 
 # Add the tests
-dd_wrapper_add_test(initialization initialization.cpp)
-dd_wrapper_add_test(api api.cpp)
-dd_wrapper_add_test(threading threading.cpp)
-dd_wrapper_add_test(forking forking.cpp)
-dd_wrapper_add_test(code_provenance code_provenance.cpp)
+dd_wrapper_add_test(test_initialization test_initialization.cpp)
+dd_wrapper_add_test(test_api test_api.cpp)
+dd_wrapper_add_test(test_threading test_threading.cpp)
+dd_wrapper_add_test(test_forking test_forking.cpp)
+dd_wrapper_add_test(test_code_provenance test_code_provenance.cpp)
diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/test/api.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/test/test_api.cpp
similarity index 100%
rename from ddtrace/internal/datadog/profiling/dd_wrapper/test/api.cpp
rename to ddtrace/internal/datadog/profiling/dd_wrapper/test/test_api.cpp
diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/test/code_provenance.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/test/test_code_provenance.cpp
similarity index 100%
rename from ddtrace/internal/datadog/profiling/dd_wrapper/test/code_provenance.cpp
rename to ddtrace/internal/datadog/profiling/dd_wrapper/test/test_code_provenance.cpp
diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/test/forking.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/test/test_forking.cpp
similarity index 99%
rename from ddtrace/internal/datadog/profiling/dd_wrapper/test/forking.cpp
rename to ddtrace/internal/datadog/profiling/dd_wrapper/test/test_forking.cpp
index e7af54abc10..e02849248e6 100644
--- a/ddtrace/internal/datadog/profiling/dd_wrapper/test/forking.cpp
+++ b/ddtrace/internal/datadog/profiling/dd_wrapper/test/test_forking.cpp
@@ -64,7 +64,7 @@ sample_in_threads_and_fork(unsigned int num_threads, unsigned int sleep_time_ns)
     int status;
     done.store(true);
     waitpid(pid, &status, 0);
-    upload_in_thread();
+    ddup_upload();
     if (!is_exit_normal(status)) {
         std::exit(1);
     }
diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/test/initialization.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/test/test_initialization.cpp
similarity index 100%
rename from ddtrace/internal/datadog/profiling/dd_wrapper/test/initialization.cpp
rename to ddtrace/internal/datadog/profiling/dd_wrapper/test/test_initialization.cpp
diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/test/threading.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/test/test_threading.cpp
similarity index 100%
rename from ddtrace/internal/datadog/profiling/dd_wrapper/test/threading.cpp
rename to ddtrace/internal/datadog/profiling/dd_wrapper/test/test_threading.cpp
diff --git a/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt
index 23fcda3eedb..dd8e149f54c 100644
--- a/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt
+++ b/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt
@@ -15,11 +15,24 @@ include(AnalysisFunc)
 function(dd_wrapper_add_test name)
     add_executable(${name} ${ARGN})
     target_include_directories(${name} PRIVATE ../include)
-    target_link_libraries(${name} PRIVATE gmock gtest_main _stack_v2)
+    # this has to refer to the stack_v2 extension name to properly link against
+    target_link_libraries(${name} PRIVATE gmock gtest_main ${EXTENSION_NAME})
+    set_target_properties(${name} PROPERTIES INSTALL_RPATH "$ORIGIN/../stack_v2")
+
     add_ddup_config(${name})
 
     gtest_discover_tests(${name})
+
+    # This is supplemental artifact so make sure to install it in the right place
+    if(INPLACE_LIB_INSTALL_DIR)
+        set(LIB_INSTALL_DIR "${INPLACE_LIB_INSTALL_DIR}")
+    endif()
+
+    if(LIB_INSTALL_DIR)
+        install(TARGETS ${name}
+            RUNTIME DESTINATION ${LIB_INSTALL_DIR}/../test)
+    endif()
 endfunction()
 
 # Add the tests
-dd_wrapper_add_test(thread_span_links thread_span_links.cpp)
+dd_wrapper_add_test(test_thread_span_links test_thread_span_links.cpp)
diff --git a/ddtrace/internal/datadog/profiling/stack_v2/test/thread_span_links.cpp b/ddtrace/internal/datadog/profiling/stack_v2/test/test_thread_span_links.cpp
similarity index 100%
rename from ddtrace/internal/datadog/profiling/stack_v2/test/thread_span_links.cpp
rename to ddtrace/internal/datadog/profiling/stack_v2/test/test_thread_span_links.cpp
diff --git a/ddtrace/internal/datadog/profiling/test/.gitignore b/ddtrace/internal/datadog/profiling/test/.gitignore
new file mode 100644
index 00000000000..d6b7ef32c84
--- /dev/null
+++ b/ddtrace/internal/datadog/profiling/test/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/ddtrace/llmobs/_constants.py b/ddtrace/llmobs/_constants.py
index 5f33349e938..7c295835e54 100644
--- a/ddtrace/llmobs/_constants.py
+++ b/ddtrace/llmobs/_constants.py
@@ -56,3 +56,7 @@
 ANNOTATIONS_CONTEXT_ID = "annotations_context_id"
 INTERNAL_CONTEXT_VARIABLE_KEYS = "_dd_context_variable_keys"
 INTERNAL_QUERY_VARIABLE_KEYS = "_dd_query_variable_keys"
+
+FAITHFULNESS_DISAGREEMENTS_METADATA = "_dd.faithfulness_disagreements"
+EVALUATION_KIND_METADATA = "_dd.evaluation_kind"
+EVALUATION_SPAN_METADATA = "_dd.evaluation_span"
diff --git a/ddtrace/llmobs/_evaluators/ragas/faithfulness.py b/ddtrace/llmobs/_evaluators/ragas/faithfulness.py
index 9b0abbd8953..d651c2443a4 100644
--- a/ddtrace/llmobs/_evaluators/ragas/faithfulness.py
+++ b/ddtrace/llmobs/_evaluators/ragas/faithfulness.py
@@ -3,6 +3,7 @@
 import traceback
 from typing import List
 from typing import Optional
+from typing import Tuple
 from typing import Union
 
 from ddtrace.internal.logger import get_logger
@@ -10,6 +11,11 @@
 from ddtrace.internal.telemetry.constants import TELEMETRY_APM_PRODUCT
 from ddtrace.internal.telemetry.constants import TELEMETRY_LOG_LEVEL
 from ddtrace.internal.utils.version import parse_version
+from ddtrace.llmobs._constants import EVALUATION_KIND_METADATA
+from ddtrace.llmobs._constants import EVALUATION_SPAN_METADATA
+from ddtrace.llmobs._constants import FAITHFULNESS_DISAGREEMENTS_METADATA
+from ddtrace.llmobs._constants import INTERNAL_CONTEXT_VARIABLE_KEYS
+from ddtrace.llmobs._constants import INTERNAL_QUERY_VARIABLE_KEYS
 from ddtrace.llmobs._constants import RAGAS_ML_APP_PREFIX
 
 
@@ -163,7 +169,7 @@ def __init__(self, llmobs_service):
     def run_and_submit_evaluation(self, span_event: dict):
         if not span_event:
             return
-        score_result_or_failure = self.evaluate(span_event)
+        score_result_or_failure, metric_metadata = self.evaluate(span_event)
         telemetry_writer.add_count_metric(
             TELEMETRY_APM_PRODUCT.LLMOBS,
             "evaluators.run",
@@ -179,9 +185,10 @@ def run_and_submit_evaluation(self, span_event: dict):
                 label=RagasFaithfulnessEvaluator.LABEL,
                 metric_type=RagasFaithfulnessEvaluator.METRIC_TYPE,
                 value=score_result_or_failure,
+                metadata=metric_metadata,
             )
 
-    def evaluate(self, span_event: dict) -> Union[float, str]:
+    def evaluate(self, span_event: dict) -> Tuple[Union[float, str], Optional[dict]]:
         """
         Performs a faithfulness evaluation on a span event, returning either
             - faithfulness score (float) OR
@@ -191,20 +198,34 @@ def evaluate(self, span_event: dict) -> Union[float, str]:
         """
         self.ragas_faithfulness_instance = _get_faithfulness_instance()
         if not self.ragas_faithfulness_instance:
-            return "fail_faithfulness_is_none"
-
-        score, question, answer, context, statements, faithfulness_list = math.nan, None, None, None, None, None
+            return "fail_faithfulness_is_none", {}
+
+        evaluation_metadata = {EVALUATION_KIND_METADATA: "faithfulness"}  # type: dict[str, Union[str, dict, list]]
+
+        # initialize data we annotate for tracing ragas
+        score, question, answer, context, statements, faithfulness_list = (
+            math.nan,
+            None,
+            None,
+            None,
+            None,
+            None,
+        )
 
         with self.llmobs_service.workflow(
             "dd-ragas.faithfulness", ml_app=_get_ml_app_for_ragas_trace(span_event)
         ) as ragas_faithfulness_workflow:
             try:
+                evaluation_metadata[EVALUATION_SPAN_METADATA] = self.llmobs_service.export_span(
+                    span=ragas_faithfulness_workflow
+                )
+
                 faithfulness_inputs = self._extract_faithfulness_inputs(span_event)
                 if faithfulness_inputs is None:
                     logger.debug(
                         "Failed to extract question and context from span sampled for ragas_faithfulness evaluation"
                     )
-                    return "fail_extract_faithfulness_inputs"
+                    return "fail_extract_faithfulness_inputs", evaluation_metadata
 
                 question = faithfulness_inputs["question"]
                 answer = faithfulness_inputs["answer"]
@@ -213,19 +234,23 @@ def evaluate(self, span_event: dict) -> Union[float, str]:
                 statements = self._create_statements(question, answer)
                 if statements is None:
                     logger.debug("Failed to create statements from answer for `ragas_faithfulness` evaluator")
-                    return "statements_is_none"
+                    return "statements_is_none", evaluation_metadata
 
                 faithfulness_list = self._create_verdicts(context, statements)
                 if faithfulness_list is None:
                     logger.debug("Failed to create faithfulness list `ragas_faithfulness` evaluator")
-                    return "statements_create_faithfulness_list"
+                    return "statements_create_faithfulness_list", evaluation_metadata
+
+                evaluation_metadata[FAITHFULNESS_DISAGREEMENTS_METADATA] = [
+                    {"answer_quote": answer.statement} for answer in faithfulness_list.__root__ if answer.verdict == 0
+                ]
 
                 score = self._compute_score(faithfulness_list)
                 if math.isnan(score):
                     logger.debug("Score computation returned NaN for `ragas_faithfulness` evaluator")
-                    return "statements_compute_score"
+                    return "statements_compute_score", evaluation_metadata
 
-                return score
+                return score, evaluation_metadata
             finally:
                 self.llmobs_service.annotate(
                     span=ragas_faithfulness_workflow,
@@ -341,10 +366,12 @@ def _extract_faithfulness_inputs(self, span_event: dict) -> Optional[dict]:
                 answer = messages[-1].get("content")
 
             if prompt_variables:
-                question = prompt_variables.get("question")
-                context = prompt_variables.get("context")
+                context_keys = prompt.get(INTERNAL_CONTEXT_VARIABLE_KEYS, ["context"])
+                question_keys = prompt.get(INTERNAL_QUERY_VARIABLE_KEYS, ["question"])
+                context = " ".join([prompt_variables.get(key) for key in context_keys if prompt_variables.get(key)])
+                question = " ".join([prompt_variables.get(key) for key in question_keys if prompt_variables.get(key)])
 
-            if not question and len(input_messages) > 0:
+            if not question and input_messages is not None and len(input_messages) > 0:
                 question = input_messages[-1].get("content")
 
             self.llmobs_service.annotate(
diff --git a/ddtrace/profiling/profiler.py b/ddtrace/profiling/profiler.py
index a74136912cc..48789200d7f 100644
--- a/ddtrace/profiling/profiler.py
+++ b/ddtrace/profiling/profiler.py
@@ -16,6 +16,7 @@
 from ddtrace.internal import service
 from ddtrace.internal import uwsgi
 from ddtrace.internal import writer
+from ddtrace.internal.core import crashtracking
 from ddtrace.internal.datadog.profiling import ddup
 from ddtrace.internal.module import ModuleWatchdog
 from ddtrace.internal.telemetry import telemetry_writer
@@ -223,6 +224,7 @@ def _build_default_exporters(self):
         configured_features.append("CAP" + str(profiling_config.capture_pct))
         configured_features.append("MAXF" + str(profiling_config.max_frames))
         self.tags.update({"profiler_config": "_".join(configured_features)})
+        crashtracking.add_tag("profiler_config", self.tags["profiler_config"])
 
         endpoint_call_counter_span_processor = self.tracer._endpoint_call_counter_span_processor
         if self.endpoint_collection_enabled:
diff --git a/docs/configuration.rst b/docs/configuration.rst
index 45673747b72..3afe7ee817f 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -681,3 +681,9 @@ Exception Replay
 ----------------
 
 .. ddtrace-envier-configuration:: ddtrace.settings.exception_replay:ExceptionReplayConfig
+
+
+Code Origin
+-----------
+
+.. ddtrace-envier-configuration:: ddtrace.settings.code_origin:CodeOriginConfig
diff --git a/releasenotes/notes/profiling-sample-pool-461a108e068dea5b.yaml b/releasenotes/notes/profiling-sample-pool-461a108e068dea5b.yaml
new file mode 100644
index 00000000000..2038245d266
--- /dev/null
+++ b/releasenotes/notes/profiling-sample-pool-461a108e068dea5b.yaml
@@ -0,0 +1,6 @@
+---
+fixes:
+  - |
+    profiling: fixes an issue where the sample pool could deadlock after ``fork()``
+      by clearing it in the child process.
+
diff --git a/riotfile.py b/riotfile.py
index 61d0e0b87db..c31945aedf3 100644
--- a/riotfile.py
+++ b/riotfile.py
@@ -1596,6 +1596,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
             },
             env={
                 "DD_AGENT_PORT": "9126",
+                "_DD_CIVISIBILITY_USE_PYTEST_V2": "1",
             },
             venvs=[
                 Venv(
@@ -1683,6 +1684,9 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
                 "more_itertools": "<8.11.0",
                 "pytest-randomly": latest,
             },
+            env={
+                "_DD_CIVISIBILITY_USE_PYTEST_V2": "0",
+            },
             venvs=[
                 Venv(
                     pys=select_pys(min_version="3.7", max_version="3.9"),
@@ -1713,6 +1717,9 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
                 "msgpack": latest,
                 "pytest-randomly": latest,
             },
+            env={
+                "_DD_CIVISIBILITY_USE_PYTEST_V2": "0",
+            },
             venvs=[
                 Venv(
                     venvs=[
@@ -2472,28 +2479,25 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
                         Venv(
                             pys=select_pys(min_version="3.7", max_version="3.8"),
                             pkgs={
-                                "gevent": ["~=20.12.0"],
-                                # greenlet>0.4.17 wheels are incompatible with gevent and python>3.7
-                                # This issue was fixed in gevent v20.9:
-                                # https://github.com/gevent/gevent/issues/1678#issuecomment-697995192
-                                "greenlet": "~=1.0.0",
+                                "gevent": latest,
+                                "greenlet": latest,
                             },
                         ),
                         Venv(
                             pys="3.9",
-                            pkgs={"gevent": "~=21.1.0", "greenlet": "~=1.0"},
+                            pkgs={"gevent": latest, "greenlet": latest},
                         ),
                         Venv(
                             pys="3.10",
-                            pkgs={"gevent": "~=21.8.0"},
+                            pkgs={"gevent": latest},
                         ),
                         Venv(
                             pys="3.11",
-                            pkgs={"gevent": "~=22.8.0"},
+                            pkgs={"gevent": latest},
                         ),
                         Venv(
                             pys=select_pys(min_version="3.12"),
-                            pkgs={"gevent": "~=23.9.0"},
+                            pkgs={"gevent": latest},
                         ),
                     ],
                 ),
@@ -2907,8 +2911,8 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
                             venvs=[
                                 Venv(
                                     pkgs={
-                                        "gevent": "==21.8.0",
-                                        "greenlet": "==1.1.0",
+                                        "gevent": latest,
+                                        "greenlet": latest,
                                     }
                                 ),
                                 Venv(
@@ -2933,14 +2937,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
                             env={
                                 "DD_PROFILE_TEST_GEVENT": "1",
                             },
-                            pkgs={
-                                "gunicorn[gevent]": latest,
-                            },
-                            venvs=[
-                                Venv(
-                                    pkgs={"gevent": ["==22.10.2", latest]},
-                                ),
-                            ],
+                            pkgs={"gunicorn[gevent]": latest, "gevent": latest},
                         ),
                     ],
                 ),
@@ -2959,14 +2956,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
                             env={
                                 "DD_PROFILE_TEST_GEVENT": "1",
                             },
-                            pkgs={
-                                "gunicorn[gevent]": latest,
-                            },
-                            venvs=[
-                                Venv(
-                                    pkgs={"gevent": ["==23.9.0"]},
-                                ),
-                            ],
+                            pkgs={"gunicorn[gevent]": latest, "gevent": latest},
                         ),
                     ],
                 ),
@@ -2976,10 +2966,16 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
             name="profile-v2",
             # NB riot commands that use this Venv must include --pass-env to work properly
             command="python -m tests.profiling.run pytest -v --no-cov --capture=no --benchmark-disable {cmdargs} tests/profiling_v2",  # noqa: E501
-            env={"DD_PROFILING_ENABLE_ASSERTS": "1", "DD_PROFILING_EXPORT_LIBDD_ENABLED": "1"},
+            env={
+                "DD_PROFILING_ENABLE_ASSERTS": "1",
+                "DD_PROFILING_EXPORT_LIBDD_ENABLED": "1",
+                # Enable pytest v2 plugin to handle pytest-cpp items in the test suite
+                "_DD_CIVISIBILITY_USE_PYTEST_V2": "1",
+            },
             pkgs={
                 "gunicorn": latest,
                 "lz4": latest,
+                "pytest-cpp": latest,
                 #
                 # pytest-benchmark depends on cpuinfo which dropped support for Python<=3.6 in 9.0
                 # See https://github.com/workhorsy/py-cpuinfo/issues/177
@@ -3054,8 +3050,8 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
                             venvs=[
                                 Venv(
                                     pkgs={
-                                        "gevent": "==21.8.0",
-                                        "greenlet": "==1.1.0",
+                                        "gevent": latest,
+                                        "greenlet": latest,
                                     }
                                 ),
                                 Venv(
@@ -3080,14 +3076,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
                             env={
                                 "DD_PROFILE_TEST_GEVENT": "1",
                             },
-                            pkgs={
-                                "gunicorn[gevent]": latest,
-                            },
-                            venvs=[
-                                Venv(
-                                    pkgs={"gevent": ["==22.10.2", latest]},
-                                ),
-                            ],
+                            pkgs={"gunicorn[gevent]": latest, "gevent": latest},
                         ),
                     ],
                 ),
@@ -3106,14 +3095,7 @@ def select_pys(min_version=MIN_PYTHON_VERSION, max_version=MAX_PYTHON_VERSION):
                             env={
                                 "DD_PROFILE_TEST_GEVENT": "1",
                             },
-                            pkgs={
-                                "gunicorn[gevent]": latest,
-                            },
-                            venvs=[
-                                Venv(
-                                    pkgs={"gevent": ["==23.9.0"]},
-                                ),
-                            ],
+                            pkgs={"gunicorn[gevent]": latest, "gevent": latest},
                         ),
                     ],
                 ),
diff --git a/setup.py b/setup.py
index 727de14e037..ce1aa685596 100644
--- a/setup.py
+++ b/setup.py
@@ -53,6 +53,8 @@
 CRASHTRACKER_DIR = HERE / "ddtrace" / "internal" / "datadog" / "profiling" / "crashtracker"
 STACK_V2_DIR = HERE / "ddtrace" / "internal" / "datadog" / "profiling" / "stack_v2"
 
+BUILD_PROFILING_NATIVE_TESTS = os.getenv("DD_PROFILING_NATIVE_TESTS", "0").lower() in ("1", "yes", "on", "true")
+
 CURRENT_OS = platform.system()
 
 LIBDDWAF_VERSION = "1.20.1"
@@ -349,6 +351,9 @@ def build_extension_cmake(self, ext):
             "-DEXTENSION_NAME={}".format(extension_basename),
         ]
 
+        if BUILD_PROFILING_NATIVE_TESTS:
+            cmake_args += ["-DBUILD_TESTING=ON"]
+
         # If it's been enabled, also propagate sccache to the CMake build.  We have to manually set the default CC/CXX
         # compilers here, because otherwise the way we wrap sccache will conflict with the CMake wrappers
         sccache_path = os.getenv("DD_SCCACHE_PATH")
@@ -567,7 +572,9 @@ def get_exts_for(name):
         "ddtrace.appsec": ["rules.json"],
         "ddtrace.appsec._ddwaf": ["libddwaf/*/lib/libddwaf.*"],
         "ddtrace.appsec._iast._taint_tracking": ["CMakeLists.txt"],
-        "ddtrace.internal.datadog.profiling": ["libdd_wrapper*.*"],
+        "ddtrace.internal.datadog.profiling": (
+            ["libdd_wrapper*.*"] + ["ddtrace/internal/datadog/profiling/test/*"] if BUILD_PROFILING_NATIVE_TESTS else []
+        ),
         "ddtrace.internal.datadog.profiling.crashtracker": ["crashtracker_exe*"],
     },
     zip_safe=False,
diff --git a/tests/ci_visibility/api/test_internal_test_visibility_api.py b/tests/ci_visibility/api/test_internal_test_visibility_api.py
index 71679fd19a4..1347a964b31 100644
--- a/tests/ci_visibility/api/test_internal_test_visibility_api.py
+++ b/tests/ci_visibility/api/test_internal_test_visibility_api.py
@@ -1,3 +1,5 @@
+import pytest
+
 import ddtrace.ext.test_visibility.api as ext_api
 from ddtrace.internal.ci_visibility import CIVisibility
 from ddtrace.internal.test_visibility import api
@@ -13,6 +15,22 @@ class TestCIITRMixin:
     Note: these tests do not bother discovering a session as the ITR functionality currently does not rely on sessions.
     """
 
+    @pytest.fixture(scope="function", autouse=True)
+    def _disable_ci_visibility(self):
+        try:
+            if CIVisibility.enabled:
+                CIVisibility.disable()
+        except Exception:  # noqa: E722
+            # no-dd-sa:python-best-practices/no-silent-exception
+            pass
+        yield
+        try:
+            if CIVisibility.enabled:
+                CIVisibility.disable()
+        except Exception:  # noqa: E722
+            # no-dd-sa:python-best-practices/no-silent-exception
+            pass
+
     def test_api_is_item_itr_skippable_test_level(self):
         with set_up_mock_civisibility(
             itr_enabled=True,
diff --git a/tests/ci_visibility/api_client/_util.py b/tests/ci_visibility/api_client/_util.py
index 8d891a6f7f2..8a260fbf3e6 100644
--- a/tests/ci_visibility/api_client/_util.py
+++ b/tests/ci_visibility/api_client/_util.py
@@ -143,6 +143,22 @@ class TestTestVisibilityAPIClientBase:
     - good/bad/incorrect API responses
     """
 
+    @pytest.fixture(scope="function", autouse=True)
+    def _disable_ci_visibility(self):
+        try:
+            if CIVisibility.enabled:
+                CIVisibility.disable()
+        except Exception:  # noqa: E722
+            # no-dd-sa:python-best-practices/no-silent-exception
+            pass
+        yield
+        try:
+            if CIVisibility.enabled:
+                CIVisibility.disable()
+        except Exception:  # noqa: E722
+            # no-dd-sa:python-best-practices/no-silent-exception
+            pass
+
     default_git_data = GitData("my_repo_url", "some_branch", "mycommitshaaaaaaalalala")
 
     default_configurations = {
diff --git a/tests/ci_visibility/test_ci_visibility.py b/tests/ci_visibility/test_ci_visibility.py
index f4ef545c5bd..b3de47a82b5 100644
--- a/tests/ci_visibility/test_ci_visibility.py
+++ b/tests/ci_visibility/test_ci_visibility.py
@@ -48,6 +48,23 @@
 TEST_SHA_2 = "b3672ea5cbc584124728c48a443825d2940e0eee"
 
 
+@pytest.fixture(scope="function", autouse=True)
+def _disable_ci_visibility():
+    try:
+        if CIVisibility.enabled:
+            CIVisibility.disable()
+    except Exception:  # noqa: E722
+        # no-dd-sa:python-best-practices/no-silent-exception
+        pass
+    yield
+    try:
+        if CIVisibility.enabled:
+            CIVisibility.disable()
+    except Exception:  # noqa: E722
+        # no-dd-sa:python-best-practices/no-silent-exception
+        pass
+
+
 @contextlib.contextmanager
 def _dummy_noop_git_client():
     with mock.patch.multiple(
@@ -611,7 +628,7 @@ def test_civisibilitywriter_coverage_agentless_with_intake_url_param(self):
             ):
                 _get_connection.return_value.getresponse.return_value.status = 200
                 dummy_writer._put("", {}, cov_client, no_trace=True)
-                _get_connection.assert_called_once_with("https://citestcov-intake.datadoghq.com", 2.0)
+                _get_connection.assert_any_call("https://citestcov-intake.datadoghq.com", 2.0)
 
     def test_civisibilitywriter_coverage_evp_proxy_url(self):
         with _ci_override_env(
@@ -631,7 +648,7 @@ def test_civisibilitywriter_coverage_evp_proxy_url(self):
             with mock.patch("ddtrace.internal.writer.writer.get_connection") as _get_connection:
                 _get_connection.return_value.getresponse.return_value.status = 200
                 dummy_writer._put("", {}, cov_client, no_trace=True)
-                _get_connection.assert_called_once_with("http://arandomhost:9126", 2.0)
+                _get_connection.assert_any_call("http://arandomhost:9126", 2.0)
 
 
 def test_civisibilitywriter_agentless_url_envvar():
diff --git a/tests/llmobs/_utils.py b/tests/llmobs/_utils.py
index afafdaf4aab..fb48d15431b 100644
--- a/tests/llmobs/_utils.py
+++ b/tests/llmobs/_utils.py
@@ -12,6 +12,7 @@
 from ddtrace._trace.span import Span
 from ddtrace.ext import SpanTypes
 from ddtrace.llmobs._utils import _get_span_name
+from ddtrace.llmobs._writer import LLMObsEvaluationMetricEvent
 
 
 if vcr:
@@ -508,6 +509,19 @@ def run_and_submit_evaluation(self, span):
         )
 
 
+def _dummy_evaluator_eval_metric_event(span_id, trace_id):
+    return LLMObsEvaluationMetricEvent(
+        span_id=span_id,
+        trace_id=trace_id,
+        score_value=1.0,
+        ml_app="unnamed-ml-app",
+        timestamp_ms=mock.ANY,
+        metric_type="score",
+        label=DummyEvaluator.LABEL,
+        tags=["ddtrace.version:{}".format(ddtrace.__version__), "ml_app:unnamed-ml-app"],
+    )
+
+
 def _expected_ragas_spans(ragas_inputs=None):
     if not ragas_inputs:
         ragas_inputs = default_ragas_inputs
diff --git a/tests/llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_ragas_faithfulness_evaluator.test_ragas_faithfulness_submits_evaluation_on_span_with_custom_keys.yaml b/tests/llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_ragas_faithfulness_evaluator.test_ragas_faithfulness_submits_evaluation_on_span_with_custom_keys.yaml
new file mode 100644
index 00000000000..1301513e8aa
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/tests.llmobs.test_llmobs_ragas_faithfulness_evaluator.test_ragas_faithfulness_submits_evaluation_on_span_with_custom_keys.yaml
@@ -0,0 +1,279 @@
+interactions:
+- request:
+    body: '{"messages": [{"content": "Given a question, an answer, and sentences from
+      the answer analyze the complexity of each sentence given under ''sentences''
+      and break down each sentence into one or more fully understandable statements
+      while also ensuring no pronouns are used in each statement. Format the outputs
+      in JSON.\n\nThe output should be a well-formatted JSON instance that conforms
+      to the JSON schema below.\n\nAs an example, for the schema {\"properties\":
+      {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\":
+      \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\nthe
+      object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema.
+      The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n\nHere
+      is the output JSON schema:\n```\n{\"type\": \"array\", \"items\": {\"$ref\":
+      \"#/definitions/Statements\"}, \"definitions\": {\"Statements\": {\"title\":
+      \"Statements\", \"type\": \"object\", \"properties\": {\"sentence_index\": {\"title\":
+      \"Sentence Index\", \"description\": \"Index of the sentence from the statement
+      list\", \"type\": \"integer\"}, \"simpler_statements\": {\"title\": \"Simpler
+      Statements\", \"description\": \"the simpler statements\", \"type\": \"array\",
+      \"items\": {\"type\": \"string\"}}}, \"required\": [\"sentence_index\", \"simpler_statements\"]}}}\n```\n\nDo
+      not return any preamble or explanations, return only a pure JSON string surrounded
+      by triple backticks (```).\n\nExamples:\n\nquestion: \"Who was Albert Einstein
+      and what is he best known for?\"\nanswer: \"He was a German-born theoretical
+      physicist, widely acknowledged to be one of the greatest and most influential
+      physicists of all time. He was best known for developing the theory of relativity,
+      he also made important contributions to the development of the theory of quantum
+      mechanics.\"\nsentences: \"\\n        0:He was a German-born theoretical physicist,
+      widely acknowledged to be one of the greatest and most influential physicists
+      of all time. \\n        1:He was best known for developing the theory of relativity,
+      he also made important contributions to the development of the theory of quantum
+      mechanics.\\n        \"\nanalysis: ```[{\"sentence_index\": 0, \"simpler_statements\":
+      [\"Albert Einstein was a German-born theoretical physicist.\", \"Albert Einstein
+      is recognized as one of the greatest and most influential physicists of all
+      time.\"]}, {\"sentence_index\": 1, \"simpler_statements\": [\"Albert Einstein
+      was best known for developing the theory of relativity.\", \"Albert Einstein
+      also made important contributions to the development of the theory of quantum
+      mechanics.\"]}]```\n\nYour actual task:\n\nquestion: \"Is france part of europe?\"\nanswer:
+      \"France is indeed part of europe\"\nsentences: \"\"\nanalysis: \n", "role":
+      "user"}], "model": "gpt-4o-mini", "n": 1, "stream": false, "temperature": 1e-08}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '2915'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.52.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.52.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.13
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4ySS2/bMBCE7/oVxJ6tQFb8qm89pAWCFn2k6MUyJIZayWwkkuWu0Ifh/x5QViQb
+        bYFedNiPM5pZ8hgJAbqErQB1kKxa18SvP6cPH7/+WGzow/397fe3T1/STfruU/tQvr/7DbOgsI/f
+        UPGL6kbZ1jXI2pozVh4lY3Cdr2/nSbJerZY9aG2JTZDVjuOFjVttdJwm6SJO1vF8M6gPVisk2Ipd
+        JIQQx/4bcpoSf8JWJLOXSYtEskbYjoeEAG+bMAFJpImlYZhNUFnDaProRVHsjhkQhonCvLfPen+R
+        AenQyefEkrFFwxTQLoM3XhqFQpNw0rOwlbjrvHV4k8H+tC+K4vJ3HquOZKhsuqYZ5qcxf2Nr5+0j
+        DXycV9poOuQeJVkTshJbBz09RULs+z11V9XBeds6ztk+oQmGq2TYE0zXM9F0OUC2LJsL1Qiu/PIS
+        WeqGLjYNSqoDlpN0uhbZldpegOii9Z9p/uZ9bq5N/T/2E1AKHWOZO4+lVteNp2Mew+v917Fxy31g
+        oF/E2OaVNjV65/X57VQur5av5Ga+SNcVRKfoGQAA//8DAN2uIFBJAwAA
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8defb8f19b1c3992-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 07 Nov 2024 19:27:45 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=GMAym2MnyZWZw_ORQOfRH7m0vuREaeNFf6gzJiHGy1k-1731007665-1.0.1.1-nArkb5npme6zrdpZ1L0Fho.0C5Glt5LEHaKERjf0koaMgNHOFfv34RUUXMfeQlNRjorW8a21hX7CcW0rBlX22w;
+        path=/; expires=Thu, 07-Nov-24 19:57:45 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=2UlWnMHqlwf2iyqQoi_qr_fVNbYS7TQkpnENDD0iUfk-1731007665474-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - datadog-staging
+      openai-processing-ms:
+      - '458'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999324'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_a8016b4905b963ff5fd373af0e60e956
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"content": "Your task is to judge the faithfulness of a
+      series of statements based on a given context. For each statement you must return
+      verdict as 1 if the statement can be directly inferred based on the context
+      or 0 if the statement can not be directly inferred based on the context.\n\nThe
+      output should be a well-formatted JSON instance that conforms to the JSON schema
+      below.\n\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\":
+      \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\":
+      {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\nthe object {\"foo\": [\"bar\",
+      \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\":
+      {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n\nHere is the output
+      JSON schema:\n```\n{\"type\": \"array\", \"items\": {\"$ref\": \"#/definitions/StatementFaithfulnessAnswer\"},
+      \"definitions\": {\"StatementFaithfulnessAnswer\": {\"title\": \"StatementFaithfulnessAnswer\",
+      \"type\": \"object\", \"properties\": {\"statement\": {\"title\": \"Statement\",
+      \"description\": \"the original statement, word-by-word\", \"type\": \"string\"},
+      \"reason\": {\"title\": \"Reason\", \"description\": \"the reason of the verdict\",
+      \"type\": \"string\"}, \"verdict\": {\"title\": \"Verdict\", \"description\":
+      \"the verdict(0/1) of the faithfulness.\", \"type\": \"integer\"}}, \"required\":
+      [\"statement\", \"reason\", \"verdict\"]}}}\n```\n\nDo not return any preamble
+      or explanations, return only a pure JSON string surrounded by triple backticks
+      (```).\n\nExamples:\n\ncontext: \"John is a student at XYZ University. He is
+      pursuing a degree in Computer Science. He is enrolled in several courses this
+      semester, including Data Structures, Algorithms, and Database Management. John
+      is a diligent student and spends a significant amount of time studying and completing
+      assignments. He often stays late in the library to work on his projects.\"\nstatements:
+      ```[\"John is majoring in Biology.\", \"John is taking a course on Artificial
+      Intelligence.\", \"John is a dedicated student.\", \"John has a part-time job.\"]```\nanswer:
+      ```[{\"statement\": \"John is majoring in Biology.\", \"reason\": \"John''s
+      major is explicitly mentioned as Computer Science. There is no information suggesting
+      he is majoring in Biology.\", \"verdict\": 0}, {\"statement\": \"John is taking
+      a course on Artificial Intelligence.\", \"reason\": \"The context mentions the
+      courses John is currently enrolled in, and Artificial Intelligence is not mentioned.
+      Therefore, it cannot be deduced that John is taking a course on AI.\", \"verdict\":
+      0}, {\"statement\": \"John is a dedicated student.\", \"reason\": \"The context
+      states that he spends a significant amount of time studying and completing assignments.
+      Additionally, it mentions that he often stays late in the library to work on
+      his projects, which implies dedication.\", \"verdict\": 1}, {\"statement\":
+      \"John has a part-time job.\", \"reason\": \"There is no information given in
+      the context about John having a part-time job.\", \"verdict\": 0}]```\n\ncontext:
+      \"Photosynthesis is a process used by plants, algae, and certain bacteria to
+      convert light energy into chemical energy.\"\nstatements: ```[\"Albert Einstein
+      was a genius.\"]```\nanswer: ```[{\"statement\": \"Albert Einstein was a genius.\",
+      \"reason\": \"The context and statement are unrelated\", \"verdict\": 0}]```\n\nYour
+      actual task:\n\ncontext: \"hello,  france is  part of europe\"\nstatements:
+      \"[\\\"France is part of Europe.\\\"]\"\nanswer: \n", "role": "user"}], "model":
+      "gpt-4o-mini", "n": 1, "stream": false, "temperature": 1e-08}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '3657'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=GMAym2MnyZWZw_ORQOfRH7m0vuREaeNFf6gzJiHGy1k-1731007665-1.0.1.1-nArkb5npme6zrdpZ1L0Fho.0C5Glt5LEHaKERjf0koaMgNHOFfv34RUUXMfeQlNRjorW8a21hX7CcW0rBlX22w;
+        _cfuvid=2UlWnMHqlwf2iyqQoi_qr_fVNbYS7TQkpnENDD0iUfk-1731007665474-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.52.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.52.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.13
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xSwW6bQBC98xWjOZsIHBJbvvmQHCJVqprm0MYRrJcBtl12V7tD68jyv1dgbGy1
+        VXvh8N68x5s3u48AUJW4ApSNYNk6Ha8/zZ8/rj+wWSc/u8UTPWXt12z9Qtn8y/MLznqF3X4jySfV
+        jbSt08TKmiMtPQmm3jVd3KZJsri/vxuI1pake1ntOM5s3Cqj4nkyz+JkEafLUd1YJSngCl4jAID9
+        8O1zmpJ2uIJkdkJaCkHUhKvzEAB6q3sERQgqsDCMs4mU1jCZIXpRFK/7DQYWTC0Z3uAKNvjohZEE
+        KoATnsFW8NB56+hmgzPYoCcRrDmOfm4IBr8dA+2cVlKxfofBMAA3guEfbj/Il0oOf04Pb0VRXEb1
+        VHVB9HWZTusRP5x317Z23m7DyJ/xShkVmvwYtN8zsHU4sIcI4G3ouLuqDZ23reOc7XcyveFiOXaM
+        02kn9vZuJNmy0BO+TE/ElV9eEgulw8WVUArZUDlJp5OKrlT2gogutv49zZ+8j5srU/+P/URISY6p
+        zJ2n/iZXG09jnvqX/7exc8tDYAzvganNK2Vq8s6r47urXJ5sRVKm86xKMTpEvwAAAP//AwA6IXqX
+        hQMAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8defb8f6280b3992-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 07 Nov 2024 19:27:46 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - datadog-staging
+      openai-processing-ms:
+      - '821'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999152'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_397280c8d63c855801cfdc02b86052b2
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/test_llmobs_evaluator_runner.py b/tests/llmobs/test_llmobs_evaluator_runner.py
index 7f7d685cf0a..7ee7d510276 100644
--- a/tests/llmobs/test_llmobs_evaluator_runner.py
+++ b/tests/llmobs/test_llmobs_evaluator_runner.py
@@ -5,12 +5,12 @@
 import mock
 import pytest
 
-import ddtrace
 from ddtrace._trace.span import Span
 from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
 from ddtrace.llmobs._evaluators.sampler import EvaluatorRunnerSampler
 from ddtrace.llmobs._evaluators.sampler import EvaluatorRunnerSamplingRule
-from ddtrace.llmobs._writer import LLMObsEvaluationMetricEvent
+from tests.llmobs._utils import DummyEvaluator
+from tests.llmobs._utils import _dummy_evaluator_eval_metric_event
 from tests.utils import override_env
 from tests.utils import override_global_config
 
@@ -18,22 +18,9 @@
 DUMMY_SPAN = Span("dummy_span")
 
 
-def _dummy_ragas_eval_metric_event(span_id, trace_id):
-    return LLMObsEvaluationMetricEvent(
-        span_id=span_id,
-        trace_id=trace_id,
-        score_value=1.0,
-        ml_app="unnamed-ml-app",
-        timestamp_ms=mock.ANY,
-        metric_type="score",
-        label="ragas_faithfulness",
-        tags=["ddtrace.version:{}".format(ddtrace.__version__), "ml_app:unnamed-ml-app"],
-    )
-
-
-def test_evaluator_runner_start(mock_evaluator_logs, mock_ragas_evaluator):
+def test_evaluator_runner_start(mock_evaluator_logs):
     evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=mock.MagicMock())
-    evaluator_runner.evaluators.append(mock_ragas_evaluator)
+    evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=mock.MagicMock()))
     evaluator_runner.start()
     mock_evaluator_logs.debug.assert_has_calls([mock.call("started %r to %r", "EvaluatorRunner")])
 
@@ -47,20 +34,20 @@ def test_evaluator_runner_buffer_limit(mock_evaluator_logs):
     )
 
 
-def test_evaluator_runner_periodic_enqueues_eval_metric(LLMObs, mock_llmobs_eval_metric_writer, mock_ragas_evaluator):
+def test_evaluator_runner_periodic_enqueues_eval_metric(LLMObs, mock_llmobs_eval_metric_writer):
     evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=LLMObs)
-    evaluator_runner.evaluators.append(mock_ragas_evaluator(llmobs_service=LLMObs))
+    evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=LLMObs))
     evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN)
     evaluator_runner.periodic()
     mock_llmobs_eval_metric_writer.enqueue.assert_called_once_with(
-        _dummy_ragas_eval_metric_event(span_id="123", trace_id="1234")
+        _dummy_evaluator_eval_metric_event(span_id="123", trace_id="1234")
     )
 
 
 @pytest.mark.vcr_logs
-def test_evaluator_runner_timed_enqueues_eval_metric(LLMObs, mock_llmobs_eval_metric_writer, mock_ragas_evaluator):
+def test_evaluator_runner_timed_enqueues_eval_metric(LLMObs, mock_llmobs_eval_metric_writer):
     evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=LLMObs)
-    evaluator_runner.evaluators.append(mock_ragas_evaluator(llmobs_service=LLMObs))
+    evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=LLMObs))
     evaluator_runner.start()
 
     evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN)
@@ -68,7 +55,7 @@ def test_evaluator_runner_timed_enqueues_eval_metric(LLMObs, mock_llmobs_eval_me
     time.sleep(0.1)
 
     mock_llmobs_eval_metric_writer.enqueue.assert_called_once_with(
-        _dummy_ragas_eval_metric_event(span_id="123", trace_id="1234")
+        _dummy_evaluator_eval_metric_event(span_id="123", trace_id="1234")
     )
 
 
diff --git a/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py b/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py
index 51da6aed3cf..1f78b538f24 100644
--- a/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py
+++ b/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py
@@ -5,11 +5,10 @@
 
 from ddtrace.llmobs._evaluators.ragas.faithfulness import RagasFaithfulnessEvaluator
 from ddtrace.span import Span
-
-from ._utils import _expected_llmobs_llm_span_event
-from ._utils import _expected_ragas_spans
-from ._utils import _llm_span_with_expected_ragas_inputs_in_messages
-from ._utils import _llm_span_with_expected_ragas_inputs_in_prompt
+from tests.llmobs._utils import _expected_llmobs_llm_span_event
+from tests.llmobs._utils import _expected_ragas_spans
+from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_messages
+from tests.llmobs._utils import _llm_span_with_expected_ragas_inputs_in_prompt
 
 
 def _llm_span_without_io():
@@ -30,7 +29,8 @@ def test_ragas_faithfulness_throws_if_dependencies_not_present(LLMObs, mock_raga
 
 def test_ragas_faithfulness_returns_none_if_inputs_extraction_fails(ragas, mock_llmobs_submit_evaluation, LLMObs):
     rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
-    assert rf_evaluator.evaluate(_llm_span_without_io()) == "fail_extract_faithfulness_inputs"
+    failure_msg, _ = rf_evaluator.evaluate(_llm_span_without_io())
+    assert failure_msg == "fail_extract_faithfulness_inputs"
     assert rf_evaluator.llmobs_service.submit_evaluation.call_count == 0
 
 
@@ -89,6 +89,11 @@ def test_ragas_faithfulness_submits_evaluation(ragas, LLMObs, mock_llmobs_submit
                 label=RagasFaithfulnessEvaluator.LABEL,
                 metric_type=RagasFaithfulnessEvaluator.METRIC_TYPE,
                 value=1.0,
+                metadata={
+                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
+                    "_dd.faithfulness_disagreements": mock.ANY,
+                    "_dd.evaluation_kind": "faithfulness",
+                },
             )
         ]
     )
@@ -112,6 +117,50 @@ def test_ragas_faithfulness_submits_evaluation_on_span_with_question_in_messages
                 label=RagasFaithfulnessEvaluator.LABEL,
                 metric_type=RagasFaithfulnessEvaluator.METRIC_TYPE,
                 value=1.0,
+                metadata={
+                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
+                    "_dd.faithfulness_disagreements": mock.ANY,
+                    "_dd.evaluation_kind": "faithfulness",
+                },
+            )
+        ]
+    )
+
+
+@pytest.mark.vcr_logs
+def test_ragas_faithfulness_submits_evaluation_on_span_with_custom_keys(ragas, LLMObs, mock_llmobs_submit_evaluation):
+    """Test that evaluation is submitted for a valid llm span where the last message content is the question"""
+    rf_evaluator = RagasFaithfulnessEvaluator(LLMObs)
+    llm_span = _expected_llmobs_llm_span_event(
+        Span("dummy"),
+        prompt={
+            "variables": {
+                "user_input": "Is france part of europe?",
+                "context_1": "hello, ",
+                "context_2": "france is ",
+                "context_3": "part of europe",
+            },
+            "_dd_context_variable_keys": ["context_1", "context_2", "context_3"],
+            "_dd_query_variable_keys": ["user_input"],
+        },
+        output_messages=[{"content": "France is indeed part of europe"}],
+    )
+    rf_evaluator.run_and_submit_evaluation(llm_span)
+    rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls(
+        [
+            mock.call(
+                span_context={
+                    "span_id": llm_span.get("span_id"),
+                    "trace_id": llm_span.get("trace_id"),
+                },
+                label=RagasFaithfulnessEvaluator.LABEL,
+                metric_type=RagasFaithfulnessEvaluator.METRIC_TYPE,
+                value=1.0,
+                metadata={
+                    "_dd.evaluation_span": {"span_id": mock.ANY, "trace_id": mock.ANY},
+                    "_dd.faithfulness_disagreements": mock.ANY,
+                    "_dd.evaluation_kind": "faithfulness",
+                },
             )
         ]
     )
diff --git a/tests/profiling_v2/native_tests b/tests/profiling_v2/native_tests
new file mode 120000
index 00000000000..9173c0212ba
--- /dev/null
+++ b/tests/profiling_v2/native_tests
@@ -0,0 +1 @@
+../../ddtrace/internal/datadog/profiling/test
\ No newline at end of file