diff --git a/.buildkite/auditbeat/auditbeat-pipeline.yml b/.buildkite/auditbeat/auditbeat-pipeline.yml
index 801768c271ec..ed19c7d91644 100644
--- a/.buildkite/auditbeat/auditbeat-pipeline.yml
+++ b/.buildkite/auditbeat/auditbeat-pipeline.yml
@@ -32,6 +32,9 @@ steps:
         command: |
           cd auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -47,6 +50,9 @@ steps:
         command: |
           cd auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_RHEL9}"
@@ -62,6 +68,9 @@ steps:
         command: |
           Set-Location -Path auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -79,6 +88,9 @@ steps:
         command: |
           Set-Location -Path auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -97,6 +109,9 @@ steps:
           make -C auditbeat crosscompile
         env:
           GOX_FLAGS: "-arch amd64"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -115,6 +130,9 @@ steps:
           set -euo pipefail
           cd auditbeat
           mage unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "aws"
           imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}"
@@ -133,6 +151,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd auditbeat
           mage unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -147,6 +168,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd auditbeat
           mage unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
@@ -164,6 +188,9 @@ steps:
         command: |
           Set-Location -Path auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -180,6 +207,9 @@ steps:
         command: |
           Set-Location -Path auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -196,6 +226,9 @@ steps:
         command: |
           Set-Location -Path auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
diff --git a/.buildkite/filebeat/filebeat-pipeline.yml b/.buildkite/filebeat/filebeat-pipeline.yml
index 7eedd9d76fba..053e8dbec419 100644
--- a/.buildkite/filebeat/filebeat-pipeline.yml
+++ b/.buildkite/filebeat/filebeat-pipeline.yml
@@ -30,6 +30,9 @@ steps:
         command: |
           cd filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -45,6 +48,9 @@ steps:
         command: |
           cd filebeat
           mage goIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -60,6 +66,9 @@ steps:
         command: |
           cd filebeat
           mage pythonIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: gcp
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -76,6 +85,9 @@ steps:
         command: |
           Set-Location -Path filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -94,6 +106,9 @@ steps:
         command: |
           Set-Location -Path filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -118,6 +133,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -136,6 +154,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
@@ -152,6 +173,9 @@ steps:
         command: |
           cd filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "aws"
           imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}"
@@ -172,6 +196,9 @@ steps:
         command: |
           Set-Location -Path filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -190,6 +217,9 @@ steps:
         command: |
           Set-Location -Path filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -208,6 +238,9 @@ steps:
         command: |
           Set-Location -Path filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
diff --git a/.buildkite/heartbeat/heartbeat-pipeline.yml b/.buildkite/heartbeat/heartbeat-pipeline.yml
index 8091b2eead17..cadbcec1eca2 100644
--- a/.buildkite/heartbeat/heartbeat-pipeline.yml
+++ b/.buildkite/heartbeat/heartbeat-pipeline.yml
@@ -30,6 +30,9 @@ steps:
         command: |
           cd heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -45,6 +48,9 @@ steps:
         command: |
           cd heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_RHEL9}"
@@ -61,6 +67,9 @@ steps:
         command: |
           Set-Location -Path heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -78,6 +87,9 @@ steps:
         command: |
           Set-Location -Path heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -94,6 +106,9 @@ steps:
         command:  |
           cd heartbeat
           mage goIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -109,6 +124,9 @@ steps:
         command: |
           cd heartbeat
           mage pythonIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -130,6 +148,9 @@ steps:
         command: |
           cd heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "aws"
           imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}"
@@ -151,6 +172,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -168,6 +192,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
@@ -188,6 +215,9 @@ steps:
         command:  |
           Set-Location -Path heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -205,6 +235,9 @@ steps:
         command: |
           Set-Location -Path heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -222,6 +255,9 @@ steps:
         command: |
           Set-Location -Path heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
diff --git a/.buildkite/libbeat/pipeline.libbeat.yml b/.buildkite/libbeat/pipeline.libbeat.yml
index 040ad9b1d669..bc77712c330b 100644
--- a/.buildkite/libbeat/pipeline.libbeat.yml
+++ b/.buildkite/libbeat/pipeline.libbeat.yml
@@ -21,6 +21,9 @@ steps:
           set -euo pipefail
           cd libbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -38,6 +41,9 @@ steps:
           set -euo pipefail
           cd libbeat
           mage goIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -55,6 +61,9 @@ steps:
           set -euo pipefail
           cd libbeat
           mage pythonIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -72,6 +81,9 @@ steps:
           set -euo pipefail
           cd libbeat
           make crosscompile
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -89,6 +101,9 @@ steps:
           set -euo pipefail
           cd libbeat
           make STRESS_TEST_OPTIONS='-timeout=20m -race -v -parallel 1' GOTEST_OUTPUT_OPTIONS=' | go-junit-report > libbeat-stress-test.xml' stress-tests
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -108,6 +123,9 @@ steps:
           set -euo pipefail
           cd libbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "aws"
           imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}"
diff --git a/.buildkite/metricbeat/pipeline.yml b/.buildkite/metricbeat/pipeline.yml
index 1fb6bfcc2370..d15212d2ef32 100644
--- a/.buildkite/metricbeat/pipeline.yml
+++ b/.buildkite/metricbeat/pipeline.yml
@@ -32,6 +32,9 @@ steps:
       - label: ":linux: Ubuntu Unit Tests"
         key: "mandatory-linux-unit-test"
         command: "cd metricbeat && mage build unitTest"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -62,6 +65,9 @@ steps:
           echo "~~~ Running tests"
           export KUBECONFIG="$$PWD/kubecfg"
           cd metricbeat && mage goIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -92,6 +98,9 @@ steps:
           echo "~~~ Running tests"
           export KUBECONFIG="$$PWD/kubecfg"
           cd metricbeat && mage pythonIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -106,6 +115,9 @@ steps:
       - label: ":negative_squared_cross_mark: Cross compile"
         key: "mandatory-cross-compile"
         command: "make -C metricbeat crosscompile"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -122,6 +134,9 @@ steps:
           Set-Location -Path metricbeat
           mage build unitTest
         key: "mandatory-win-2016-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -140,6 +155,9 @@ steps:
           Set-Location -Path metricbeat
           mage build unitTest
         key: "mandatory-win-2022-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -162,6 +180,9 @@ steps:
           Set-Location -Path metricbeat
           mage build unitTest
         key: "extended-win-10-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -180,6 +201,9 @@ steps:
           Set-Location -Path metricbeat
           mage build unitTest
         key: "extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -198,6 +222,9 @@ steps:
           Set-Location -Path metricbeat
           mage build unitTest
         key: "extended-win-2019-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -221,6 +248,9 @@ steps:
           set -euo pipefail
           source .buildkite/scripts/install_macos_tools.sh
           cd metricbeat && mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -238,6 +268,9 @@ steps:
           set -euo pipefail
           source .buildkite/scripts/install_macos_tools.sh
           cd metricbeat && mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
diff --git a/.buildkite/packaging.pipeline.yml b/.buildkite/packaging.pipeline.yml
index a7fdabb2268d..5fd559f458d3 100644
--- a/.buildkite/packaging.pipeline.yml
+++ b/.buildkite/packaging.pipeline.yml
@@ -12,17 +12,42 @@ env:
   PLATFORMS_ARM: "linux/arm64"
 
 steps:
+  # we use concurrency gates (https://buildkite.com/blog/concurrency-gates)
+  # to implement two FIFO queues for DRA-snapshot and DRA-staging
+  # this prevents parallel builds and possibility of publishing out of order DRA artifacts if the first job takes longer than the second
+
+  - name: Start of concurrency group for DRA Snapshot
+    if: build.branch =~ /^\d+\.\d+$$/ || build.branch == 'main' || build.env('RUN_SNAPSHOT') == "true"
+    command: echo "--> Start of concurrency gate dra-snapshot"
+    concurrency_group: "dra-gate-snapshot-$BUILDKITE_BRANCH"
+    concurrency: 1
+    key: start-gate-snapshot
+
+  - name: Start of concurrency group for DRA Staging
+    if: build.branch =~ /^\d+\.\d+$$/
+    command: echo "--> Start of concurrency gate dra-staging"
+    concurrency_group: "dra-gate-staging-$BUILDKITE_BRANCH"
+    concurrency: 1
+    key: start-gate-staging
+
+  - wait
+
   - group: Beats dashboards
     key: dashboards
     steps:
       - label: Snapshot dashboards
         if: build.branch =~ /^\d+\.\d+$$/ || build.branch == 'main' || build.env('RUN_SNAPSHOT') == "true"
+        depends_on: start-gate-snapshot
         key: dashboards-snapshot
         # TODO: container with go and make
         agents:
           provider: gcp
           image: "${IMAGE_UBUNTU_X86_64}"
           machineType: "${GCP_DEFAULT_MACHINE_TYPE}"
+        timeout_in_minutes: 40
+        retry:
+          automatic:
+            - limit: 1
         commands:
           - make build/distributions/dependencies.csv
           - make beats-dashboards
@@ -34,12 +59,17 @@ steps:
 
       - label: Staging dashboards
         if: build.branch =~ /^\d+\.\d+$$/
+        depends_on: start-gate-staging
         key: dashboards-staging
         # TODO: container with go and make
         agents:
           provider: gcp
           image: "${IMAGE_UBUNTU_X86_64}"
           machineType: "${GCP_DEFAULT_MACHINE_TYPE}"
+        timeout_in_minutes: 40
+        retry:
+          automatic:
+            - limit: 1
         commands:
           - make build/distributions/dependencies.csv
           - make beats-dashboards
@@ -52,6 +82,7 @@ steps:
   - group: Packaging snapshot
     if: build.branch =~ /^\d+\.\d+$$/ || build.branch == 'main' || build.env('RUN_SNAPSHOT') == "true"
     key: packaging-snapshot
+    depends_on: start-gate-snapshot
     steps:
       - label: "SNAPSHOT: {{matrix}}"
         env:
@@ -63,6 +94,10 @@ steps:
           provider: gcp
           image: "${IMAGE_UBUNTU_X86_64}"
           machineType: "${GCP_DEFAULT_MACHINE_TYPE}"
+        timeout_in_minutes: 40
+        retry:
+          automatic:
+            - limit: 1
         artifact_paths:
           - build/distributions/**/*
         matrix:
@@ -93,6 +128,10 @@ steps:
           provider: "aws"
           imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}"
           instanceType: "${AWS_ARM_INSTANCE_TYPE}"
+        timeout_in_minutes: 40
+        retry:
+          automatic:
+            - limit: 1
         artifact_paths:
           - build/distributions/**/*
         matrix:
@@ -119,12 +158,16 @@ steps:
           provider: gcp
           image: "${IMAGE_UBUNTU_X86_64}"
           machineType: "c2-standard-16"
+        timeout_in_minutes: 40
+        retry:
+          automatic:
+            - limit: 1
         artifact_paths:
           - build/distributions/**/*
 
   - group: Packaging Staging
-
     key: packaging-staging
+    depends_on: start-gate-staging
     ## Only for release
     if: build.branch =~ /^\d+\.\d+$$/
     steps:
@@ -138,6 +181,10 @@ steps:
           provider: gcp
           image: "${IMAGE_UBUNTU_X86_64}"
           machineType: "${GCP_DEFAULT_MACHINE_TYPE}"
+        timeout_in_minutes: 40
+        retry:
+          automatic:
+            - limit: 1
         artifact_paths:
           - build/distributions/**/*
         matrix:
@@ -168,8 +215,12 @@ steps:
           provider: "aws"
           imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}"
           instanceType: "${AWS_ARM_INSTANCE_TYPE}"
+        timeout_in_minutes: 40
+        retry:
+          automatic:
+            - limit: 1
         artifact_paths:
-          - build/distributions/**
+          - build/distributions/**/*
         matrix:
           - auditbeat
           - filebeat
@@ -194,6 +245,10 @@ steps:
           provider: gcp
           image: "${IMAGE_UBUNTU_X86_64}"
           machineType: "c2-standard-16"
+        timeout_in_minutes: 40
+        retry:
+          automatic:
+            - limit: 1
         artifact_paths:
           - build/distributions/**/*
 
@@ -207,6 +262,7 @@ steps:
         env:
           DRA_WORKFLOW: snapshot
         depends_on:
+          - start-gate-snapshot
           - packaging-snapshot
           - dashboards-snapshot
         command: |
@@ -225,6 +281,7 @@ steps:
         env:
           DRA_WORKFLOW: staging
         depends_on:
+          - start-gate-staging
           - packaging-staging
           - dashboards-staging
         command: |
@@ -235,3 +292,17 @@ steps:
           provider: gcp
           image: "${IMAGE_UBUNTU_X86_64}"
           machineType: "${GCP_DEFAULT_MACHINE_TYPE}"
+
+  - wait
+
+  - command: echo "End of concurrency gate dra-snapshot <--"
+    if: build.branch =~ /^\d+\.\d+$$/ || build.branch == 'main' || build.env('RUN_SNAPSHOT') == "true"
+    concurrency_group: "dra-gate-snapshot-$BUILDKITE_BRANCH"
+    concurrency: 1
+    key: end-gate-snapshot
+
+  - command: echo "End of concurrency gate dra-staging <--"
+    if: build.branch =~ /^\d+\.\d+$$/
+    concurrency_group: "dra-gate-staging-$BUILDKITE_BRANCH"
+    concurrency: 1
+    key: end-gate-staging
diff --git a/.buildkite/packetbeat/pipeline.packetbeat.yml b/.buildkite/packetbeat/pipeline.packetbeat.yml
index c0f5c1e1a735..d510107a89c2 100644
--- a/.buildkite/packetbeat/pipeline.packetbeat.yml
+++ b/.buildkite/packetbeat/pipeline.packetbeat.yml
@@ -28,6 +28,9 @@ steps:
         command: |
           cd packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -43,6 +46,9 @@ steps:
         command: |
           cd packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_RHEL9_X86_64}"
@@ -58,6 +64,9 @@ steps:
         command: |
           Set-Location -Path packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -75,6 +84,9 @@ steps:
         command: |
           Set-Location -Path packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -96,6 +108,9 @@ steps:
         command: |
           Set-Location -Path packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -114,6 +129,9 @@ steps:
           Set-Location -Path packetbeat
           mage build unitTest
         key: "extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -132,6 +150,9 @@ steps:
           Set-Location -Path packetbeat
           mage build unitTest
         key: "extended-win-2019-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -156,6 +177,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -174,6 +198,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
@@ -188,6 +215,9 @@ steps:
         key: "linux-arm64-unit-tests-extended"
         command: "cd packetbeat && mage build unitTest"
         if: build.env("BUILDKITE_PULL_REQUEST") == "false" || build.env("GITHUB_PR_LABELS") =~ /.*arm.*/
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "aws"
           imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}"
diff --git a/.buildkite/pipeline-scheduler.yml b/.buildkite/pipeline-scheduler.yml
new file mode 100644
index 000000000000..3f9b628bc63a
--- /dev/null
+++ b/.buildkite/pipeline-scheduler.yml
@@ -0,0 +1,17 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/buildkite/pipeline-schema/main/schema.json
+
+# this intermediate pipeline is required because we can't specify a custom agent (k8s image) yet
+# in catalog-info: https://github.com/elastic/ci/blob/71e83d340e3b93ab43fcf16a7a70ac33bdeec6e9/terrazzo/terrazzo/constructs/buildkite/pipelines.py#L787-L842
+
+steps:
+  - label: ":pipeline: Generate trigger steps for $PIPELINES_TO_TRIGGER"
+    command: |
+      set -eo pipefail
+      .buildkite/pipeline-scheduler.py >steps.yml
+      echo "~~~ Printing pipeline steps"
+      yq . steps.yml
+      echo "~~~ Uploading steps"
+      buildkite-agent pipeline upload steps.yml
+    agents:
+      image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-beats-ci-with-hooks:0.1"
+      useCustomGlobalHooks: true
diff --git a/.buildkite/scripts/dra.sh b/.buildkite/scripts/dra.sh
index ec9d523bf3ce..5ce6e5884b99 100755
--- a/.buildkite/scripts/dra.sh
+++ b/.buildkite/scripts/dra.sh
@@ -70,11 +70,13 @@ docker run --rm \
         --artifact-set "main" \
         ${DRY_RUN} | tee rm-output.txt
 
-# extract the summary URL from a release manager output line like:
-# Report summary-18.22.0.html can be found at https://artifacts-staging.elastic.co/beats/18.22.0-ABCDEFGH/summary-18.22.0.html
 
-SUMMARY_URL=$(grep -E '^Report summary-.* can be found at ' rm-output.txt | grep -oP 'https://\S+' | awk '{print $1}')
-rm rm-output.txt
+if [[ "$DRY_RUN" != "--dry-run" ]]; then
+  # extract the summary URL from a release manager output line like:
+  # Report summary-18.22.0.html can be found at https://artifacts-staging.elastic.co/beats/18.22.0-ABCDEFGH/summary-18.22.0.html
+  SUMMARY_URL=$(grep -E '^Report summary-.* can be found at ' rm-output.txt | grep -oP 'https://\S+' | awk '{print $1}')
+  rm rm-output.txt
 
-# and make it easily clickable as a Builkite annotation
-printf "**Summary link:** [${SUMMARY_URL}](${SUMMARY_URL})\n" | buildkite-agent annotate --style=success 
+  # and make it easily clickable as a Builkite annotation
+  printf "**${DRA_WORKFLOW} summary link:** [${SUMMARY_URL}](${SUMMARY_URL})\n" | buildkite-agent annotate --style=success --append
+fi
diff --git a/.buildkite/winlogbeat/pipeline.winlogbeat.yml b/.buildkite/winlogbeat/pipeline.winlogbeat.yml
index c71858b45b0a..ff3327913492 100644
--- a/.buildkite/winlogbeat/pipeline.winlogbeat.yml
+++ b/.buildkite/winlogbeat/pipeline.winlogbeat.yml
@@ -24,6 +24,9 @@ steps:
       - label: ":ubuntu: Winlogbeat Crossccompile"
         key: "mandatory-cross-compile"
         command: "make -C winlogbeat crosscompile"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -40,6 +43,9 @@ steps:
           Set-Location -Path winlogbeat
           mage build unitTest
         key: "mandatory-win-2016-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -58,6 +64,9 @@ steps:
           Set-Location -Path winlogbeat
           mage build unitTest
         key: "mandatory-win-2019-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -76,6 +85,9 @@ steps:
           Set-Location -Path winlogbeat
           mage build unitTest
         key: "mandatory-win-2022-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -99,6 +111,9 @@ steps:
           Set-Location -Path winlogbeat
           mage build unitTest
         key: "extended-win-10-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -117,6 +132,9 @@ steps:
           Set-Location -Path winlogbeat
           mage build unitTest
         key: "extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
diff --git a/.buildkite/x-pack/pipeline.xpack.auditbeat.yml b/.buildkite/x-pack/pipeline.xpack.auditbeat.yml
index 36fcb9bebd99..80c298c725df 100644
--- a/.buildkite/x-pack/pipeline.xpack.auditbeat.yml
+++ b/.buildkite/x-pack/pipeline.xpack.auditbeat.yml
@@ -36,6 +36,9 @@ steps:
           echo "~~~ Will run tests with env var MODULE=$$MODULE"
           cd x-pack/auditbeat
           mage update build test
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -52,6 +55,9 @@ steps:
         command: |
           cd x-pack/auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_RHEL9_X86_64}"
@@ -68,6 +74,9 @@ steps:
           Set-Location -Path x-pack/auditbeat
           mage build unitTest
         key: "mandatory-win-2022-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -86,6 +95,9 @@ steps:
           Set-Location -Path x-pack/auditbeat
           mage build unitTest
         key: "mandatory-win-2016-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -108,6 +120,9 @@ steps:
           Set-Location -Path x-pack/auditbeat
           mage build unitTest
         key: "extended-win-2019-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -126,6 +141,9 @@ steps:
           Set-Location -Path x-pack/auditbeat
           mage build unitTest
         key: "extended-win-10-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -144,6 +162,9 @@ steps:
           Set-Location -Path x-pack/auditbeat
           mage build unitTest
         key: "extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -167,6 +188,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd x-pack/auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -183,6 +207,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd x-pack/auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
@@ -201,6 +228,9 @@ steps:
         command: |
           cd x-pack/auditbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "aws"
           imagePrefix: "${IMAGE_UBUNTU_ARM_64}"
diff --git a/.buildkite/x-pack/pipeline.xpack.dockerlogbeat.yml b/.buildkite/x-pack/pipeline.xpack.dockerlogbeat.yml
index 05aee81e4d80..a64f7851913b 100644
--- a/.buildkite/x-pack/pipeline.xpack.dockerlogbeat.yml
+++ b/.buildkite/x-pack/pipeline.xpack.dockerlogbeat.yml
@@ -28,6 +28,9 @@ steps:
       - label: ":ubuntu: Xpack/Dockerlogbeat Ubuntu Unit Tests"
         key: "mandatory-linux-unit-test"
         command: "cd x-pack/dockerlogbeat && mage build unitTest"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -44,6 +47,9 @@ steps:
         command: "cd x-pack/dockerlogbeat && mage goIntegTest"
         env:
           MODULE: $MODULE
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
diff --git a/.buildkite/x-pack/pipeline.xpack.filebeat.yml b/.buildkite/x-pack/pipeline.xpack.filebeat.yml
index 795302bc2d99..b7e71e3c3c0a 100644
--- a/.buildkite/x-pack/pipeline.xpack.filebeat.yml
+++ b/.buildkite/x-pack/pipeline.xpack.filebeat.yml
@@ -30,6 +30,9 @@ steps:
         command: |
           cd x-pack/filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -50,6 +53,9 @@ steps:
           defineModuleFromTheChangeSet x-pack/filebeat
           echo "~~~ Will run tests with env var MODULE=$$MODULE"
           cd x-pack/filebeat && mage goIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -70,6 +76,9 @@ steps:
           defineModuleFromTheChangeSet x-pack/filebeat
           echo "~~~ Running tests with env var MODULE=$$MODULE"
           cd x-pack/filebeat && mage pythonIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -86,6 +95,9 @@ steps:
           Set-Location -Path x-pack/filebeat
           mage build unitTest
         key: "x-pack-filebeat-mandatory-win-2022-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -104,6 +116,9 @@ steps:
           Set-Location -Path x-pack/filebeat
           mage build unitTest
         key: "x-pack-filebeat-mandatory-win-2016-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -122,6 +137,9 @@ steps:
         command: |
           cd x-pack/filebeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "aws"
           imagePrefix: "${IMAGE_UBUNTU_ARM_64}"
@@ -142,6 +160,9 @@ steps:
           Set-Location -Path x-pack/filebeat
           mage build unitTest
         key: "x-pack-filebeat-extended-win-2019-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -160,6 +181,9 @@ steps:
           Set-Location -Path x-pack/filebeat
           mage build unitTest
         key: "x-pack-filebeat-extended-win-10-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -178,6 +202,9 @@ steps:
           Set-Location -Path x-pack/filebeat
           mage build unitTest
         key: "x-pack-filebeat-extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -200,6 +227,9 @@ steps:
           set -euo pipefail
           source .buildkite/scripts/install_macos_tools.sh
           cd x-pack/filebeat && mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -217,6 +247,9 @@ steps:
           set -euo pipefail
           source .buildkite/scripts/install_macos_tools.sh
           cd x-pack/filebeat && mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
diff --git a/.buildkite/x-pack/pipeline.xpack.heartbeat.yml b/.buildkite/x-pack/pipeline.xpack.heartbeat.yml
index 107dfa65f1b2..136706e698cc 100644
--- a/.buildkite/x-pack/pipeline.xpack.heartbeat.yml
+++ b/.buildkite/x-pack/pipeline.xpack.heartbeat.yml
@@ -39,6 +39,9 @@ steps:
           echo "~~~ Running tests"
           cd x-pack/heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -59,6 +62,9 @@ steps:
           echo "~~~ Running tests"
           cd x-pack/heartbeat
           mage goIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -76,6 +82,9 @@ steps:
         command: |
           Set-Location -Path x-pack/heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -95,6 +104,9 @@ steps:
         command: |
           Set-Location -Path x-pack/heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -118,6 +130,9 @@ steps:
           Set-Location -Path x-pack/heartbeat
           mage build test
         key: "extended-win-10-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -136,6 +151,9 @@ steps:
           Set-Location -Path x-pack/heartbeat
           mage build test
         key: "extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -153,6 +171,9 @@ steps:
         command: |
           Set-Location -Path x-pack/heartbeat
           mage build test
+        retry:
+          automatic:
+           - limit: 3
         key: "extended-win-2019-unit-tests"
         agents:
           provider: "gcp"
@@ -166,7 +187,7 @@ steps:
         notify:
           - github_commit_status:
               context: "x-pack/heartbeat: Windows 2019 Unit Tests"
-  
+
   - group: "x-pack/heartbeat MacOS Extended Tests"
     key: "x-pack-heartbeat-extended-tests-macos"
     if: build.env("BUILDKITE_PULL_REQUEST") == "false" || build.env("GITHUB_PR_LABELS") =~ /.*macOS.*/
@@ -179,6 +200,9 @@ steps:
           installNodeJsDependencies
           cd x-pack/heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -197,6 +221,9 @@ steps:
           installNodeJsDependencies
           cd x-pack/heartbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
diff --git a/.buildkite/x-pack/pipeline.xpack.libbeat.yml b/.buildkite/x-pack/pipeline.xpack.libbeat.yml
index 14316a3ecd70..6bf456f6d83d 100644
--- a/.buildkite/x-pack/pipeline.xpack.libbeat.yml
+++ b/.buildkite/x-pack/pipeline.xpack.libbeat.yml
@@ -26,6 +26,9 @@ steps:
         command: |
           cd x-pack/libbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -42,6 +45,9 @@ steps:
         command: |
           cd x-pack/libbeat
           mage goIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -58,6 +64,9 @@ steps:
         command: |
           cd x-pack/libbeat
           mage pythonIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -74,6 +83,9 @@ steps:
           Set-Location -Path x-pack/libbeat
           mage -w reader\etw build goUnitTest
         key: "mandatory-win-2016-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -92,6 +104,9 @@ steps:
           Set-Location -Path x-pack/libbeat
           mage -w reader\etw build goUnitTest
         key: "mandatory-win-2022-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -114,6 +129,9 @@ steps:
           Set-Location -Path x-pack/libbeat
           mage -w reader\etw build goUnitTest
         key: "extended-win-10-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -132,6 +150,9 @@ steps:
           Set-Location -Path x-pack/libbeat
           mage -w reader\etw build goUnitTest
         key: "extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -150,6 +171,9 @@ steps:
           Set-Location -Path x-pack/libbeat
           mage -w reader\etw build goUnitTest
         key: "extended-win-2019-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -172,6 +196,9 @@ steps:
         command: |
           cd x-pack/libbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "aws"
           imagePrefix: "${IMAGE_UBUNTU_ARM_64}"
diff --git a/.buildkite/x-pack/pipeline.xpack.metricbeat.yml b/.buildkite/x-pack/pipeline.xpack.metricbeat.yml
index 317b9069c556..4c1c31521f92 100644
--- a/.buildkite/x-pack/pipeline.xpack.metricbeat.yml
+++ b/.buildkite/x-pack/pipeline.xpack.metricbeat.yml
@@ -30,6 +30,9 @@ steps:
         command: |
           cd x-pack/metricbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -50,6 +53,9 @@ steps:
           defineModuleFromTheChangeSet x-pack/metricbeat
           echo "~~~ Will run tests with env var MODULE=$$MODULE"
           cd x-pack/metricbeat && mage goIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -70,6 +76,9 @@ steps:
           defineModuleFromTheChangeSet x-pack/metricbeat
           echo "~~~ Running tests with env var MODULE=$$MODULE"
           cd x-pack/metricbeat && mage pythonIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -86,6 +95,9 @@ steps:
           Set-Location -Path x-pack/metricbeat
           mage build unitTest
         key: "mandatory-win-2016-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -104,6 +116,9 @@ steps:
           Set-Location -Path x-pack/metricbeat
           mage build unitTest
         key: "mandatory-win-2022-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -126,6 +141,9 @@ steps:
           Set-Location -Path x-pack/metricbeat
           mage build unitTest
         key: "extended-win-10-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -144,6 +162,9 @@ steps:
           Set-Location -Path x-pack/metricbeat
           mage build unitTest
         key: "extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -162,6 +183,9 @@ steps:
           Set-Location -Path x-pack/metricbeat
           mage build unitTest
         key: "extended-win-2019-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -185,6 +209,9 @@ steps:
           set -euo pipefail
           source .buildkite/scripts/install_macos_tools.sh
           cd x-pack/metricbeat && mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -202,6 +229,9 @@ steps:
           set -euo pipefail
           source .buildkite/scripts/install_macos_tools.sh
           cd x-pack/metricbeat && mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
diff --git a/.buildkite/x-pack/pipeline.xpack.osquerybeat.yml b/.buildkite/x-pack/pipeline.xpack.osquerybeat.yml
index 8c9137cb423e..c8ecac79735b 100644
--- a/.buildkite/x-pack/pipeline.xpack.osquerybeat.yml
+++ b/.buildkite/x-pack/pipeline.xpack.osquerybeat.yml
@@ -30,6 +30,9 @@ steps:
         command: |
           cd x-pack/osquerybeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -46,6 +49,9 @@ steps:
         command: |
           cd x-pack/osquerybeat
           mage goIntegTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -62,6 +68,9 @@ steps:
           Set-Location -Path x-pack/osquerybeat
           mage build unitTest
         key: "mandatory-win-2016-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -80,6 +89,9 @@ steps:
           Set-Location -Path x-pack/osquerybeat
           mage build unitTest
         key: "mandatory-win-2022-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -102,6 +114,9 @@ steps:
           Set-Location -Path x-pack/osquerybeat
           mage build unitTest
         key: "extended-win-10-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -120,6 +135,9 @@ steps:
           Set-Location -Path x-pack/osquerybeat
           mage build unitTest
         key: "extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -138,6 +156,9 @@ steps:
           Set-Location -Path x-pack/osquerybeat
           mage build unitTest
         key: "extended-win-2019-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -160,6 +181,9 @@ steps:
           set -euo pipefail
           source .buildkite/scripts/install_macos_tools.sh
           cd x-pack/osquerybeat && mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -175,6 +199,9 @@ steps:
           set -euo pipefail
           source .buildkite/scripts/install_macos_tools.sh
           cd x-pack/osquerybeat && mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
diff --git a/.buildkite/x-pack/pipeline.xpack.packetbeat.yml b/.buildkite/x-pack/pipeline.xpack.packetbeat.yml
index 77fdf2af8483..1ab71c30d7df 100644
--- a/.buildkite/x-pack/pipeline.xpack.packetbeat.yml
+++ b/.buildkite/x-pack/pipeline.xpack.packetbeat.yml
@@ -29,6 +29,9 @@ steps:
         command: |
           cd x-pack/packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -45,6 +48,9 @@ steps:
         command: |
           cd x-pack/packetbeat
           mage systemTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_UBUNTU_X86_64}"
@@ -61,6 +67,9 @@ steps:
         command: |
           cd x-pack/packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_RHEL9_X86_64}"
@@ -77,6 +86,9 @@ steps:
           Set-Location -Path x-pack/packetbeat
           mage build unitTest
         key: "mandatory-win-2016-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -95,6 +107,9 @@ steps:
           Set-Location -Path x-pack/packetbeat
           mage build unitTest
         key: "mandatory-win-2022-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -114,6 +129,9 @@ steps:
         command: |
           Set-Location -Path x-pack/packetbeat
           mage systemTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -136,6 +154,9 @@ steps:
           Set-Location -Path x-pack/packetbeat
           mage build unitTest
         key: "extended-win-10-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -154,6 +175,9 @@ steps:
           Set-Location -Path x-pack/packetbeat
           mage build unitTest
         key: "extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -172,6 +196,9 @@ steps:
           Set-Location -Path x-pack/packetbeat
           mage build unitTest
         key: "extended-win-2019-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -191,6 +218,9 @@ steps:
         command: |
           Set-Location -Path x-pack/packetbeat
           mage systemTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -214,6 +244,9 @@ steps:
           cd x-pack/packetbeat
           mage build unitTest
         if: build.env("GITHUB_PR_LABELS") =~ /.*arm.*/
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "aws"
           imagePrefix: "${IMAGE_UBUNTU_ARM_64}"
@@ -236,6 +269,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd x-pack/packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_X86_64}"
@@ -253,6 +289,9 @@ steps:
           source .buildkite/scripts/install_macos_tools.sh
           cd x-pack/packetbeat
           mage build unitTest
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "orka"
           imagePrefix: "${IMAGE_MACOS_ARM}"
diff --git a/.buildkite/x-pack/pipeline.xpack.winlogbeat.yml b/.buildkite/x-pack/pipeline.xpack.winlogbeat.yml
index c07e537adf09..c6b5a6f59fe5 100644
--- a/.buildkite/x-pack/pipeline.xpack.winlogbeat.yml
+++ b/.buildkite/x-pack/pipeline.xpack.winlogbeat.yml
@@ -29,6 +29,9 @@ steps:
           mage build unitTest
         env:
           MODULE: $MODULE
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
@@ -47,6 +50,9 @@ steps:
           Set-Location -Path x-pack/winlogbeat
           mage build unitTest
         key: "mandatory-win-2016-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2016}"
@@ -65,6 +71,9 @@ steps:
           Set-Location -Path x-pack/winlogbeat
           mage build unitTest
         key: "mandatory-win-2022-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2022}"
@@ -88,6 +97,9 @@ steps:
           Set-Location -Path x-pack/winlogbeat
           mage build unitTest
         key: "extended-win-10-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_10}"
@@ -106,6 +118,9 @@ steps:
           Set-Location -Path x-pack/winlogbeat
           mage build unitTest
         key: "extended-win-11-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_11}"
@@ -124,6 +139,9 @@ steps:
           Set-Location -Path x-pack/winlogbeat
           mage build unitTest
         key: "extended-win-2019-unit-tests"
+        retry:
+          automatic:
+           - limit: 3
         agents:
           provider: "gcp"
           image: "${IMAGE_WIN_2019}"
diff --git a/.ci/jobs/packaging.yml b/.ci/jobs/packaging.yml
index 6d4b136a5573..50cec32edd84 100644
--- a/.ci/jobs/packaging.yml
+++ b/.ci/jobs/packaging.yml
@@ -14,7 +14,7 @@
           discover-pr-forks-trust: 'permission'
           discover-pr-origin: 'merge-current'
           discover-tags: true
-          head-filter-regex: '(7\.1[6789]|8\.\d+|PR-.*|v\d+\.\d+\.\d+)'
+          head-filter-regex: '(7\.1[6789]|8\.13|PR-.*|v8\.13\.\d+)'
           disable-pr-notifications: true
           notification-context: 'beats-packaging'
           repo: 'beats'
@@ -28,11 +28,11 @@
               ignore-tags-older-than: -1
               ignore-tags-newer-than: 30
           - named-branches:
-              - regex-name:
-                  regex: '7\.1[6789]'
+              - exact-name:
+                  name: '8.13'
                   case-sensitive: true
               - regex-name:
-                  regex: '8\.\d+'
+                  regex: '7\.1[6789]'
                   case-sensitive: true
           - change-request:
               ignore-target-only-changes: true
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 7fcaca8ac9e1..bbd4255fd870 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -34,6 +34,7 @@ updates:
       - dependency-name: github.com/elastic/go-perf
       - dependency-name: github.com/elastic/go-seccomp-bpf
       - dependency-name: github.com/elastic/toutoumomoma
+      - dependency-name: github.com/elastic/ebpfevents
     ignore:
       # Skip github.com/elastic/mito because it requires documentation updates.
       - dependency-name: github.com/elastic/mito
diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc
index a1f6bf4aadd1..e9e5ec09236c 100644
--- a/CHANGELOG.next.asciidoc
+++ b/CHANGELOG.next.asciidoc
@@ -94,8 +94,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
 
 *Auditbeat*
 - Set field types to correctly match ECS in sessionmd processor {issue}38955[38955] {pull}38994[38994]
-- Keep process info on exited processes, to avoid failing to enrich events in sessionmd processor {pull}39173[39173]
-
+- Fix failing to enrich process events in sessionmd processor {issue}38955[38955] {pull}39173[39173] {pull}39243[39243]
 - Prevent scenario of losing children-related file events in a directory for recursive fsnotify backend of auditbeat file integrity module {pull}39133[39133]
 - Allow extra syscalls by auditbeat required in FIM with kprobes back-end {pull}39361[39361]
 
@@ -143,6 +142,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
 - Updated Websocket input title to align with existing inputs {pull}39006[39006]
 - Restore netflow input on Windows {pull}39024[39024]
 - Upgrade azure-event-hubs-go and azure-storage-blob-go dependencies. {pull}38861[38861]
+- Fix concurrency/error handling bugs in the AWS S3 input that could drop data and prevent ingestion of large buckets. {pull}39131[39131]
 
 *Heartbeat*
 
@@ -160,6 +160,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
 - Fix fields not being parsed correctly in postgresql/database {issue}25301[25301] {pull}37720[37720]
 - rabbitmq/queue - Change the mapping type of `rabbitmq.queue.consumers.utilisation.pct` to `scaled_float` from `long` because the values fall within the range of `[0.0, 1.0]`. Previously, conversion to integer resulted in reporting either `0` or `1`.
 - Fix timeout caused by the retrival of which indices are hidden {pull}39165[39165]
+- Fix Azure Monitor support for multiple aggregation types {issue}39192[39192] {pull}39204[39204]
 
 *Osquerybeat*
 
diff --git a/NOTICE.txt b/NOTICE.txt
index f060baf40980..951b7e7785c1 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -12525,11 +12525,11 @@ various licenses:
 
 --------------------------------------------------------------------------------
 Dependency : github.com/elastic/elastic-agent-autodiscover
-Version: v0.6.13
+Version: v0.6.14
 Licence type (autodetected): Apache-2.0
 --------------------------------------------------------------------------------
 
-Contents of probable licence file $GOMODCACHE/github.com/elastic/elastic-agent-autodiscover@v0.6.13/LICENSE:
+Contents of probable licence file $GOMODCACHE/github.com/elastic/elastic-agent-autodiscover@v0.6.14/LICENSE:
 
                                  Apache License
                            Version 2.0, January 2004
@@ -25433,11 +25433,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 --------------------------------------------------------------------------------
 Dependency : golang.org/x/net
-Version: v0.21.0
+Version: v0.23.0
 Licence type (autodetected): BSD-3-Clause
 --------------------------------------------------------------------------------
 
-Contents of probable licence file $GOMODCACHE/golang.org/x/net@v0.21.0/LICENSE:
+Contents of probable licence file $GOMODCACHE/golang.org/x/net@v0.23.0/LICENSE:
 
 Copyright (c) 2009 The Go Authors. All rights reserved.
 
diff --git a/catalog-info.yaml b/catalog-info.yaml
index 116e50246634..34d9e397ca3e 100644
--- a/catalog-info.yaml
+++ b/catalog-info.yaml
@@ -61,7 +61,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -108,7 +108,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -155,7 +155,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -202,7 +202,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -249,7 +249,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -296,7 +296,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -343,7 +343,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -390,7 +390,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -436,7 +436,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -483,7 +483,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -530,7 +530,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -577,7 +577,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -624,7 +624,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -671,7 +671,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -706,7 +706,7 @@ spec:
         release-eng:
           access_level: BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
 apiVersion: backstage.io/v1alpha1
@@ -788,7 +788,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -835,7 +835,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -882,7 +882,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -929,7 +929,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -976,7 +976,7 @@ spec:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -1015,7 +1015,7 @@ spec:
         release-eng:
           access_level: BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -1040,24 +1040,33 @@ spec:
     spec:
       repository: elastic/beats
       pipeline_file: ".buildkite/packaging.pipeline.yml"
-      branch_configuration: "main"
+      branch_configuration: "main 8.14"
       # TODO enable after packaging backports for release branches
       # branch_configuration: "main 8.* 7.17"
       cancel_intermediate_builds: false
       skip_intermediate_builds: false
+      maximum_timeout_in_minutes: 90
       provider_settings:
+        build_branches: true
+        build_pull_request_forks: false
+        build_pull_requests: false
+        build_tags: false
+        filter_condition: >-
+          build.branch =~ /^[0-9]+\.[0-9]+$$/ || build.branch == "main"
+        filter_enabled: true
         trigger_mode: code
       env:
         ELASTIC_SLACK_NOTIFICATIONS_ENABLED: 'true'
         SLACK_NOTIFICATIONS_CHANNEL: '#ingest-notifications'
         SLACK_NOTIFICATIONS_ON_SUCCESS: 'false'
+        SLACK_NOTIFICATIONS_SKIP_FOR_RETRIES: 'true'        
       teams:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         release-eng:
           access_level: BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
 
 ---
 # yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
@@ -1087,29 +1096,56 @@ spec:
       skip_intermediate_builds: false
       provider_settings:
         trigger_mode: none
-      # TODO uncomment out after https://github.com/elastic/ingest-dev/issues/3235
-      # schedules:
-      #   # TODO to be replaced with a generic scheduler similar to https://github.com/elastic/logstash/pull/15705
-      #   Daily run of ironbank validation / main:
-      #     branch: main
-      #     cronline: 30 02 * * *
-      #     message: Daily trigger of IronBank validation on main
-      #   Daily run of ironbank validation / 8.14:
-      #     branch: 8.14
-      #     cronline: 30 02 * * *
-      #     message: Daily trigger of IronBank validation on 8.14
-      #   Daily run of ironbank validation / 8.13:
-      #     branch: 8.13
-      #     cronline: 30 02 * * *
-      #     message: Daily trigger of IronBank validation on 8.13
-      #   Daily run of ironbank validation / 7.17:
-      #     branch: 7.17
-      #     cronline: 30 02 * * *
-      #     message: Daily trigger of IronBank validation on 7.17
       teams:
         ingest-fp:
           access_level: MANAGE_BUILD_AND_READ
         release-eng:
           access_level: BUILD_AND_READ
         everyone:
-          access_level: READ_ONLY
+          access_level: BUILD_AND_READ
+
+---
+# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json
+apiVersion: backstage.io/v1alpha1
+kind: Resource
+metadata:
+  name: beats-pipeline-scheduler
+  description: 'Scheduled runs of various Beats pipelines per release branch'
+  links:
+    - title: 'Scheduled runs of Beats pipelines per release branch'
+      url: https://buildkite.com/elastic/logstash-pipeline-scheduler
+spec:
+  type: buildkite-pipeline
+  owner: group:ingest-fp
+  system: buildkite
+  implementation:
+    apiVersion: buildkite.elastic.dev/v1
+    kind: Pipeline
+    metadata:
+      name: beats-pipeline-scheduler
+      description: ':alarm_clock: Scheduled runs of various Beats pipelines per release branch'
+    spec:
+      repository: elastic/beats
+      pipeline_file: ".buildkite/pipeline-scheduler.yml"
+      maximum_timeout_in_minutes: 240
+      schedules:
+        Daily run of Iron Bank validation:
+          branch: main
+          cronline: 30 02 * * *
+          message: Daily trigger of Iron Bank validation Pipeline per branch
+          env:
+            PIPELINES_TO_TRIGGER: 'beats-ironbank-validation'
+      skip_intermediate_builds: true
+      provider_settings:
+        trigger_mode: none
+      env:
+        ELASTIC_SLACK_NOTIFICATIONS_ENABLED: 'true'
+        SLACK_NOTIFICATIONS_CHANNEL: '#ingest-notifications'
+        SLACK_NOTIFICATIONS_ON_SUCCESS: 'false'
+      teams:
+        ingest-fp:
+          access_level: MANAGE_BUILD_AND_READ
+        release-eng:
+          access_level: BUILD_AND_READ
+        everyone:
+          access_level: BUILD_AND_READ
diff --git a/dev-tools/mage/kubernetes/kuberemote.go b/dev-tools/mage/kubernetes/kuberemote.go
index 8e9d9897d44f..e3062f00d1ad 100644
--- a/dev-tools/mage/kubernetes/kuberemote.go
+++ b/dev-tools/mage/kubernetes/kuberemote.go
@@ -250,7 +250,7 @@ func (r *KubeRemote) waitForPod(wait time.Duration, condition watchtools.Conditi
 	return nil, err
 }
 
-// portFoward runs the port forwarding so SSH rsync can be ran into the pod.
+// portForward runs the port forwarding so SSH rsync can be ran into the pod.
 func (r *KubeRemote) portForward(ports []string, stopChannel, readyChannel chan struct{}, stdout, stderr io.Writer) (*portforward.PortForwarder, error) {
 	roundTripper, upgrader, err := spdy.RoundTripperFor(r.cfg)
 	if err != nil {
diff --git a/filebeat/docs/inputs/input-filestream-file-options.asciidoc b/filebeat/docs/inputs/input-filestream-file-options.asciidoc
index 47a8c819d9ea..5436d3863dc2 100644
--- a/filebeat/docs/inputs/input-filestream-file-options.asciidoc
+++ b/filebeat/docs/inputs/input-filestream-file-options.asciidoc
@@ -517,6 +517,30 @@ less than or equal to `prospector.scanner.check_interval`
 If `backoff.max` needs to be higher, it is recommended to close the file handler
 instead and let {beatname_uc} pick up the file again.
 
+[float]
+[id="{beatname_lc}-input-{type}-harvester-limit"]
+===== `harvester_limit`
+
+The `harvester_limit` option limits the number of harvesters that are started in
+parallel for one input. This directly relates to the maximum number of file
+handlers that are opened. The default for `harvester_limit` is 0, which means
+there is no limit. This configuration is useful if the number of files to be
+harvested exceeds the open file handler limit of the operating system.
+
+Setting a limit on the number of harvesters means that potentially not all files
+are opened in parallel. Therefore we recommended that you use this option in
+combination with the `close.on_state_change.*` options to make sure
+harvesters are stopped more often so that new files can be picked up.
+
+Currently if a new harvester can be started again, the harvester is picked
+randomly. This means it's possible that the harvester for a file that was just
+closed and then updated again might be started instead of the harvester for a
+file that hasn't been harvested for a longer period of time.
+
+This configuration option applies per input. You can use this option to
+indirectly set higher priorities on certain inputs by assigning a higher
+limit of harvesters.
+
 [float]
 ===== `file_identity`
 
diff --git a/filebeat/docs/inputs/input-filestream.asciidoc b/filebeat/docs/inputs/input-filestream.asciidoc
index 47d1b24a8e85..54283d6cce79 100644
--- a/filebeat/docs/inputs/input-filestream.asciidoc
+++ b/filebeat/docs/inputs/input-filestream.asciidoc
@@ -11,8 +11,9 @@ Use the `filestream` input to read lines from active log files. It is the
 new, improved alternative to the `log` input. It comes with various improvements
 to the existing input:
 
-1. Checking of `close_*` options happens out of band. Thus, if an output is blocked,
-{beatname_uc} can close the reader and avoid keeping too many files open.
+1. Checking of `close.on_state_change.*` options happens out of
+band. Thus, if an output is blocked, {beatname_uc} can close the
+reader and avoid keeping too many files open.
 
 2. Detailed metrics are available for all files that match the `paths` configuration
 regardless of the `harvester_limit`. This way, you can keep track of all files,
diff --git a/filebeat/input/filestream/environment_test.go b/filebeat/input/filestream/environment_test.go
index 7c3c8ccd4d3b..88163258938a 100644
--- a/filebeat/input/filestream/environment_test.go
+++ b/filebeat/input/filestream/environment_test.go
@@ -448,7 +448,7 @@ func (e *inputTestingEnvironment) waitUntilHarvesterIsDone() {
 	}
 }
 
-// requireEventReceived requires that the list of messages has made it into the output.
+// requireEventsReceived requires that the list of messages has made it into the output.
 func (e *inputTestingEnvironment) requireEventsReceived(events []string) {
 	foundEvents := make([]bool, len(events))
 	checkedEventCount := 0
diff --git a/go.mod b/go.mod
index 0805e9200c8d..ad13afabd8da 100644
--- a/go.mod
+++ b/go.mod
@@ -154,7 +154,7 @@ require (
 	golang.org/x/crypto v0.21.0
 	golang.org/x/lint v0.0.0-20210508222113-6edffad5e616
 	golang.org/x/mod v0.14.0
-	golang.org/x/net v0.21.0
+	golang.org/x/net v0.23.0
 	golang.org/x/oauth2 v0.10.0
 	golang.org/x/sync v0.5.0
 	golang.org/x/sys v0.18.0
@@ -203,7 +203,7 @@ require (
 	github.com/awslabs/kinesis-aggregation/go/v2 v2.0.0-20220623125934-28468a6701b5
 	github.com/elastic/bayeux v1.0.5
 	github.com/elastic/ebpfevents v0.6.0
-	github.com/elastic/elastic-agent-autodiscover v0.6.13
+	github.com/elastic/elastic-agent-autodiscover v0.6.14
 	github.com/elastic/elastic-agent-libs v0.7.5
 	github.com/elastic/elastic-agent-shipper-client v0.5.1-0.20230228231646-f04347b666f3
 	github.com/elastic/elastic-agent-system-metrics v0.9.2
diff --git a/go.sum b/go.sum
index 57711b7a9feb..5c45bdee7488 100644
--- a/go.sum
+++ b/go.sum
@@ -551,8 +551,8 @@ github.com/elastic/dhcp v0.0.0-20200227161230-57ec251c7eb3 h1:lnDkqiRFKm0rxdljqr
 github.com/elastic/dhcp v0.0.0-20200227161230-57ec251c7eb3/go.mod h1:aPqzac6AYkipvp4hufTyMj5PDIphF3+At8zr7r51xjY=
 github.com/elastic/ebpfevents v0.6.0 h1:BrL3m7JFK7U6h2jkbk3xAWWs//IZnugCHEDds5u2v68=
 github.com/elastic/ebpfevents v0.6.0/go.mod h1:ESG9gw7N+n5yCCMgdg1IIJENKWSmX7+X0Fi9GUs9nvU=
-github.com/elastic/elastic-agent-autodiscover v0.6.13 h1:zBeTxV+o2efEKntY+o6iMMNJ1AVjDXUqY3o6uzIkKaw=
-github.com/elastic/elastic-agent-autodiscover v0.6.13/go.mod h1:7P6YVKxuBT0qE/VxuA87obwZUAEU0O44mCN3r4/6x8w=
+github.com/elastic/elastic-agent-autodiscover v0.6.14 h1:0zJYNyv9GKTOiNqCHqEVboP+WioV73ia17Et+UlFbz8=
+github.com/elastic/elastic-agent-autodiscover v0.6.14/go.mod h1:39/fHHlnyTK6oUNZfAhxJwBTVahO9tNasEIjzsxGMu8=
 github.com/elastic/elastic-agent-client/v7 v7.8.1 h1:J9wZc/0mUvSEok0X5iR5+n60Jgb+AWooKddb3XgPWqM=
 github.com/elastic/elastic-agent-client/v7 v7.8.1/go.mod h1:axl1nkdqc84YRFkeJGD9jExKNPUrOrzf3DFo2m653nY=
 github.com/elastic/elastic-agent-libs v0.7.5 h1:4UMqB3BREvhwecYTs/L23oQp1hs/XUkcunPlmTZn5yg=
@@ -1960,8 +1960,8 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
-golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4=
-golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
+golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190130055435-99b60b757ec1/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
diff --git a/libbeat/monitoring/report/log/log.go b/libbeat/monitoring/report/log/log.go
index 886e207593a3..e11e8228cf70 100644
--- a/libbeat/monitoring/report/log/log.go
+++ b/libbeat/monitoring/report/log/log.go
@@ -37,34 +37,36 @@ import (
 // TODO: Replace this with a proper solution that uses the metric type from
 // where it is defined. See: https://github.com/elastic/beats/issues/5433
 var gauges = map[string]bool{
-	"libbeat.output.events.active":       true,
-	"libbeat.pipeline.events.active":     true,
-	"libbeat.pipeline.clients":           true,
-	"libbeat.config.module.running":      true,
-	"registrar.states.current":           true,
-	"filebeat.events.active":             true,
-	"filebeat.harvester.running":         true,
-	"filebeat.harvester.open_files":      true,
-	"beat.memstats.memory_total":         true,
-	"beat.memstats.memory_alloc":         true,
-	"beat.memstats.rss":                  true,
-	"beat.memstats.gc_next":              true,
-	"beat.info.uptime.ms":                true,
-	"beat.cgroup.memory.mem.usage.bytes": true,
-	"beat.cpu.user.ticks":                true,
-	"beat.cpu.system.ticks":              true,
-	"beat.cpu.total.value":               true,
-	"beat.cpu.total.ticks":               true,
-	"beat.handles.open":                  true,
-	"beat.handles.limit.hard":            true,
-	"beat.handles.limit.soft":            true,
-	"beat.runtime.goroutines":            true,
-	"system.load.1":                      true,
-	"system.load.5":                      true,
-	"system.load.15":                     true,
-	"system.load.norm.1":                 true,
-	"system.load.norm.5":                 true,
-	"system.load.norm.15":                true,
+	"libbeat.output.events.active":             true,
+	"libbeat.pipeline.events.active":           true,
+	"libbeat.pipeline.clients":                 true,
+	"libbeat.pipeline.queue.max_events":        true,
+	"libbeat.pipeline.queue.filled.pct.events": true,
+	"libbeat.config.module.running":            true,
+	"registrar.states.current":                 true,
+	"filebeat.events.active":                   true,
+	"filebeat.harvester.running":               true,
+	"filebeat.harvester.open_files":            true,
+	"beat.memstats.memory_total":               true,
+	"beat.memstats.memory_alloc":               true,
+	"beat.memstats.rss":                        true,
+	"beat.memstats.gc_next":                    true,
+	"beat.info.uptime.ms":                      true,
+	"beat.cgroup.memory.mem.usage.bytes":       true,
+	"beat.cpu.user.ticks":                      true,
+	"beat.cpu.system.ticks":                    true,
+	"beat.cpu.total.value":                     true,
+	"beat.cpu.total.ticks":                     true,
+	"beat.handles.open":                        true,
+	"beat.handles.limit.hard":                  true,
+	"beat.handles.limit.soft":                  true,
+	"beat.runtime.goroutines":                  true,
+	"system.load.1":                            true,
+	"system.load.5":                            true,
+	"system.load.15":                           true,
+	"system.load.norm.1":                       true,
+	"system.load.norm.5":                       true,
+	"system.load.norm.15":                      true,
 }
 
 // isGauge returns true when the given metric key name represents a gauge value.
@@ -249,16 +251,16 @@ func toKeyValuePairs(snaps map[string]monitoring.FlatSnapshot) []interface{} {
 	for name, snap := range snaps {
 		data := make(mapstr.M, snapshotLen(snap))
 		for k, v := range snap.Bools {
-			data.Put(k, v) //nolint:errcheck // All keys within the flat snapshot are unique and are for scalar values.
+			data.Put(k, v)
 		}
 		for k, v := range snap.Floats {
-			data.Put(k, v) //nolint:errcheck // All keys within the flat snapshot are unique and are for scalar values.
+			data.Put(k, v)
 		}
 		for k, v := range snap.Ints {
-			data.Put(k, v) //nolint:errcheck // All keys within the flat snapshot are unique and are for scalar values.
+			data.Put(k, v)
 		}
 		for k, v := range snap.Strings {
-			data.Put(k, v) //nolint:errcheck // All keys within the flat snapshot are unique and are for scalar values.
+			data.Put(k, v)
 		}
 		if len(data) > 0 {
 			args = append(args, logp.Reflect(name, data))
diff --git a/libbeat/processors/add_docker_metadata/docs/add_docker_metadata.asciidoc b/libbeat/processors/add_docker_metadata/docs/add_docker_metadata.asciidoc
index 53292667f13b..616582101733 100644
--- a/libbeat/processors/add_docker_metadata/docs/add_docker_metadata.asciidoc
+++ b/libbeat/processors/add_docker_metadata/docs/add_docker_metadata.asciidoc
@@ -5,6 +5,11 @@
 <titleabbrev>add_docker_metadata</titleabbrev>
 ++++
 
+ifeval::["{beatname_lc}"=="packetbeat"]
+There is currently extremely limited capability for using {beatname_lc} to monitor and coexist with containers, for example Docker, Podman, or Kubernetes. Using the `add_docker_metadata` processor with {beatname_lc} is not recommended nor supported. 
+endif::[]
+
+ifeval::["{beatname_lc}"!="packetbeat"]
 The `add_docker_metadata` processor annotates each event with relevant metadata
 from Docker containers. At startup it detects a docker environment and caches the metadata.
 The events are annotated with Docker metadata, only if a valid configuration
@@ -88,3 +93,4 @@ forget metadata for a container, 60s by default.
 
 `labels.dedot`:: (Optional) Default to be false. If set to true, replace dots in
  labels with `_`.
+endif::[]
\ No newline at end of file
diff --git a/libbeat/publisher/pipeline/monitoring.go b/libbeat/publisher/pipeline/monitoring.go
index 69a21c2c71ca..cda329e0963a 100644
--- a/libbeat/publisher/pipeline/monitoring.go
+++ b/libbeat/publisher/pipeline/monitoring.go
@@ -17,7 +17,11 @@
 
 package pipeline
 
-import "github.com/elastic/elastic-agent-libs/monitoring"
+import (
+	"math"
+
+	"github.com/elastic/elastic-agent-libs/monitoring"
+)
 
 type observer interface {
 	pipelineObserver
@@ -67,8 +71,9 @@ type metricsObserverVars struct {
 	activeEvents                        *monitoring.Uint
 
 	// queue metrics
-	queueACKed     *monitoring.Uint
-	queueMaxEvents *monitoring.Uint
+	queueACKed       *monitoring.Uint
+	queueMaxEvents   *monitoring.Uint
+	percentQueueFull *monitoring.Float
 }
 
 func newMetricsObserver(metrics *monitoring.Registry) *metricsObserver {
@@ -92,7 +97,8 @@ func newMetricsObserver(metrics *monitoring.Registry) *metricsObserver {
 			queueACKed:     monitoring.NewUint(reg, "queue.acked"),
 			queueMaxEvents: monitoring.NewUint(reg, "queue.max_events"),
 
-			activeEvents: monitoring.NewUint(reg, "events.active"), // Gauge
+			activeEvents:     monitoring.NewUint(reg, "events.active"), // Gauge
+			percentQueueFull: monitoring.NewFloat(reg, "queue.filled.pct.events"),
 		},
 	}
 }
@@ -121,12 +127,24 @@ func (o *metricsObserver) clientClosed() { o.vars.clients.Dec() }
 func (o *metricsObserver) newEvent() {
 	o.vars.events.Inc()
 	o.vars.activeEvents.Inc()
+	o.setPercentageFull()
+}
+
+// setPercentageFull is used interally to set the `queue.full` metric
+func (o *metricsObserver) setPercentageFull() {
+	maxEvt := o.vars.queueMaxEvents.Get()
+	if maxEvt != 0 {
+		pct := float64(o.vars.activeEvents.Get()) / float64(maxEvt)
+		pctRound := math.Round(pct/0.0005) * 0.0005
+		o.vars.percentQueueFull.Set(pctRound)
+	}
 }
 
 // (client) event is filtered out (on purpose or failed)
 func (o *metricsObserver) filteredEvent() {
 	o.vars.filtered.Inc()
 	o.vars.activeEvents.Dec()
+	o.setPercentageFull()
 }
 
 // (client) managed to push an event into the publisher pipeline
@@ -138,6 +156,7 @@ func (o *metricsObserver) publishedEvent() {
 func (o *metricsObserver) failedPublishEvent() {
 	o.vars.failed.Inc()
 	o.vars.activeEvents.Dec()
+	o.setPercentageFull()
 }
 
 //
@@ -148,11 +167,13 @@ func (o *metricsObserver) failedPublishEvent() {
 func (o *metricsObserver) queueACKed(n int) {
 	o.vars.queueACKed.Add(uint64(n))
 	o.vars.activeEvents.Sub(uint64(n))
+	o.setPercentageFull()
 }
 
 // (queue) maximum queue event capacity
 func (o *metricsObserver) queueMaxEvents(n int) {
 	o.vars.queueMaxEvents.Set(uint64(n))
+	o.setPercentageFull()
 }
 
 //
diff --git a/metricbeat/scripts/mage/package.go b/metricbeat/scripts/mage/package.go
index e206881dd3ca..43e12652f4a5 100644
--- a/metricbeat/scripts/mage/package.go
+++ b/metricbeat/scripts/mage/package.go
@@ -40,7 +40,7 @@ const (
 // not supported. You must declare a dependency on either
 // PrepareModulePackagingOSS or PrepareModulePackagingXPack.
 func CustomizePackaging() {
-	mg.Deps(customizeLightModulesPackaging)
+	mg.Deps(CustomizeLightModulesPackaging)
 
 	var (
 		modulesDTarget = "modules.d"
@@ -104,7 +104,7 @@ func CustomizePackaging() {
 // PrepareModulePackagingOSS generates build/package/modules and
 // build/package/modules.d directories for use in packaging.
 func PrepareModulePackagingOSS() error {
-	err := prepareLightModulesPackaging("module")
+	err := PrepareLightModulesPackaging("module")
 	if err != nil {
 		return err
 	}
@@ -116,7 +116,7 @@ func PrepareModulePackagingOSS() error {
 // PrepareModulePackagingXPack generates build/package/modules and
 // build/package/modules.d directories for use in packaging.
 func PrepareModulePackagingXPack() error {
-	err := prepareLightModulesPackaging("module", devtools.OSSBeatDir("module"))
+	err := PrepareLightModulesPackaging("module", devtools.OSSBeatDir("module"))
 	if err != nil {
 		return err
 	}
@@ -201,8 +201,8 @@ func GenerateDirModulesD() error {
 	return nil
 }
 
-// customizeLightModulesPackaging customizes packaging to add light modules
-func customizeLightModulesPackaging() error {
+// CustomizeLightModulesPackaging customizes packaging to add light modules
+func CustomizeLightModulesPackaging() error {
 	var (
 		moduleTarget = "module"
 		module       = devtools.PackageFile{
@@ -225,8 +225,8 @@ func customizeLightModulesPackaging() error {
 	return nil
 }
 
-// prepareLightModulesPackaging generates light modules
-func prepareLightModulesPackaging(paths ...string) error {
+// PrepareLightModulesPackaging generates light modules
+func PrepareLightModulesPackaging(paths ...string) error {
 	err := devtools.Clean([]string{dirModulesGenerated})
 	if err != nil {
 		return err
diff --git a/testing/environments/snapshot.yml b/testing/environments/snapshot.yml
index a031c2184e5a..30002f9a255c 100644
--- a/testing/environments/snapshot.yml
+++ b/testing/environments/snapshot.yml
@@ -3,7 +3,7 @@
 version: '2.3'
 services:
   elasticsearch:
-    image: docker.elastic.co/elasticsearch/elasticsearch:8.15.0-053650c4-SNAPSHOT
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.15.0-aa640648-SNAPSHOT
     # When extend is used it merges healthcheck.tests, see:
     # https://github.com/docker/compose/issues/8962
     # healthcheck:
@@ -31,7 +31,7 @@ services:
     - "./docker/elasticsearch/users_roles:/usr/share/elasticsearch/config/users_roles"
 
   logstash:
-    image: docker.elastic.co/logstash/logstash:8.15.0-053650c4-SNAPSHOT
+    image: docker.elastic.co/logstash/logstash:8.15.0-aa640648-SNAPSHOT
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:9600/_node/stats"]
       retries: 600
@@ -44,7 +44,7 @@ services:
       - 5055:5055
 
   kibana:
-    image: docker.elastic.co/kibana/kibana:8.15.0-053650c4-SNAPSHOT
+    image: docker.elastic.co/kibana/kibana:8.15.0-aa640648-SNAPSHOT
     environment:
     - "ELASTICSEARCH_USERNAME=kibana_system_user"
     - "ELASTICSEARCH_PASSWORD=testing"
diff --git a/x-pack/agentbeat/magefile.go b/x-pack/agentbeat/magefile.go
index 874c79bf7a30..c7e6c561830d 100644
--- a/x-pack/agentbeat/magefile.go
+++ b/x-pack/agentbeat/magefile.go
@@ -20,6 +20,7 @@ import (
 
 	devtools "github.com/elastic/beats/v7/dev-tools/mage"
 	"github.com/elastic/beats/v7/dev-tools/mage/target/build"
+	metricbeat "github.com/elastic/beats/v7/metricbeat/scripts/mage"
 	packetbeat "github.com/elastic/beats/v7/packetbeat/scripts/mage"
 	osquerybeat "github.com/elastic/beats/v7/x-pack/osquerybeat/scripts/mage"
 
@@ -112,11 +113,19 @@ func CrossBuildDeps() error {
 	return callForBeat("crossBuildExt", "osquerybeat")
 }
 
+// PrepareLightModules prepares the module packaging.
+func PrepareLightModules() error {
+	return metricbeat.PrepareLightModulesPackaging(
+		filepath.Join("..", "metricbeat", "module"),       // x-pack/metricbeat
+		filepath.Join("..", "..", "metricbeat", "module"), // metricbeat (oss)
+	)
+}
+
 // Package packages the Beat for distribution.
 // Use SNAPSHOT=true to build snapshots.
 // Use PLATFORMS to control the target platforms.
 // Use VERSION_QUALIFIER to control the version qualifier.
-func Package() {
+func Package() error {
 	start := time.Now()
 	defer func() { fmt.Println("package ran for", time.Since(start)) }()
 
@@ -126,7 +135,14 @@ func Package() {
 	// Add osquery distro binaries, required for the osquerybeat subcommand.
 	osquerybeat.CustomizePackaging()
 
-	mg.SerialDeps(Update, osquerybeat.FetchOsqueryDistros, CrossBuildDeps, CrossBuild, devtools.Package, TestPackages)
+	// Add metricbeat lightweight modules.
+	if err := metricbeat.CustomizeLightModulesPackaging(); err != nil {
+		return err
+	}
+
+	mg.SerialDeps(Update, PrepareLightModules, osquerybeat.FetchOsqueryDistros, CrossBuildDeps, CrossBuild, devtools.Package, TestPackages)
+
+	return nil
 }
 
 // TestPackages tests the generated packages (i.e. file modes, owners, groups).
diff --git a/x-pack/auditbeat/processors/sessionmd/add_session_metadata.go b/x-pack/auditbeat/processors/sessionmd/add_session_metadata.go
index ff9fa54e556a..766e9623b9ea 100644
--- a/x-pack/auditbeat/processors/sessionmd/add_session_metadata.go
+++ b/x-pack/auditbeat/processors/sessionmd/add_session_metadata.go
@@ -96,13 +96,24 @@ func New(cfg *cfg.C) (beat.Processor, error) {
 }
 
 func (p *addSessionMetadata) Run(ev *beat.Event) (*beat.Event, error) {
-	_, err := ev.GetValue(p.config.PIDField)
+	pi, err := ev.GetValue(p.config.PIDField)
 	if err != nil {
 		// Do not attempt to enrich events without PID; it's not a supported event
 		return ev, nil //nolint:nilerr // Running on events without PID is expected
 	}
 
-	err = p.provider.UpdateDB(ev)
+	// Do not enrich failed syscalls, as there was no actual process change related to it
+	v, err := ev.GetValue("auditd.result")
+	if err == nil && v == "fail" {
+		return ev, nil
+	}
+
+	pid, err := pidToUInt32(pi)
+	if err != nil {
+		return ev, nil //nolint:nilerr // Running on events with a different PID type is not a processor error
+	}
+
+	err = p.provider.UpdateDB(ev, pid)
 	if err != nil {
 		return ev, err
 	}
@@ -136,7 +147,9 @@ func (p *addSessionMetadata) enrich(ev *beat.Event) (*beat.Event, error) {
 
 	fullProcess, err := p.db.GetProcess(pid)
 	if err != nil {
-		return nil, fmt.Errorf("pid %v not found in db: %w", pid, err)
+		e := fmt.Errorf("pid %v not found in db: %w", pid, err)
+		p.logger.Errorf("%v", e)
+		return nil, e
 	}
 
 	processMap := fullProcess.ToMap()
diff --git a/x-pack/auditbeat/processors/sessionmd/processdb/db.go b/x-pack/auditbeat/processors/sessionmd/processdb/db.go
index 2c7c228e2c1c..b8c624abe00a 100644
--- a/x-pack/auditbeat/processors/sessionmd/processdb/db.go
+++ b/x-pack/auditbeat/processors/sessionmd/processdb/db.go
@@ -238,7 +238,6 @@ func (db *DB) InsertFork(fork types.ProcessForkEvent) {
 
 	pid := fork.ChildPIDs.Tgid
 	ppid := fork.ParentPIDs.Tgid
-	db.scrapeAncestors(db.processes[pid])
 
 	if entry, ok := db.processes[ppid]; ok {
 		entry.PIDs = pidInfoFromProto(fork.ChildPIDs)
@@ -282,7 +281,6 @@ func (db *DB) InsertExec(exec types.ProcessExecEvent) {
 	}
 
 	db.processes[exec.PIDs.Tgid] = proc
-	db.scrapeAncestors(proc)
 	entryLeaderPID := db.evaluateEntryLeader(proc)
 	if entryLeaderPID != nil {
 		db.entryLeaderRelationships[exec.PIDs.Tgid] = *entryLeaderPID
@@ -568,6 +566,14 @@ func setSameAsProcess(process *types.Process) {
 	}
 }
 
+func (db *DB) HasProcess(pid uint32) bool {
+	db.mutex.RLock()
+	defer db.mutex.RUnlock()
+
+	_, ok := db.processes[pid]
+	return ok
+}
+
 func (db *DB) GetProcess(pid uint32) (types.Process, error) {
 	db.mutex.RLock()
 	defer db.mutex.RUnlock()
@@ -585,8 +591,6 @@ func (db *DB) GetProcess(pid uint32) (types.Process, error) {
 				fillParent(&ret, parent)
 				break
 			}
-			db.logger.Debugf("failed to find %d in DB (parent of %d), attempting to scrape", process.PIDs.Ppid, pid)
-			db.scrapeAncestors(process)
 		}
 	}
 
@@ -596,8 +600,6 @@ func (db *DB) GetProcess(pid uint32) (types.Process, error) {
 				fillGroupLeader(&ret, groupLeader)
 				break
 			}
-			db.logger.Debugf("failed to find %d in DB (group leader of %d), attempting to scrape", process.PIDs.Pgid, pid)
-			db.scrapeAncestors(process)
 		}
 	}
 
@@ -607,8 +609,6 @@ func (db *DB) GetProcess(pid uint32) (types.Process, error) {
 				fillSessionLeader(&ret, sessionLeader)
 				break
 			}
-			db.logger.Debugf("failed to find %d in DB (session leader of %d), attempting to scrape", process.PIDs.Sid, pid)
-			db.scrapeAncestors(process)
 		}
 	}
 
@@ -712,29 +712,6 @@ func getTTYType(major uint16, minor uint16) TTYType {
 	return TTYUnknown
 }
 
-func (db *DB) scrapeAncestors(proc Process) {
-	for _, pid := range []uint32{proc.PIDs.Pgid, proc.PIDs.Ppid, proc.PIDs.Sid} {
-		if _, exists := db.processes[pid]; pid == 0 || exists {
-			continue
-		}
-		procInfo, err := db.procfs.GetProcess(pid)
-		if err != nil {
-			db.logger.Debugf("couldn't get %v from procfs: %w", pid, err)
-			continue
-		}
-		p := Process{
-			PIDs:     pidInfoFromProto(procInfo.PIDs),
-			Creds:    credInfoFromProto(procInfo.Creds),
-			CTTY:     ttyDevFromProto(procInfo.CTTY),
-			Argv:     procInfo.Argv,
-			Cwd:      procInfo.Cwd,
-			Env:      procInfo.Env,
-			Filename: procInfo.Filename,
-		}
-		db.insertProcess(p)
-	}
-}
-
 func (db *DB) Close() {
 	close(db.stopChan)
 }
diff --git a/x-pack/auditbeat/processors/sessionmd/provider/ebpf_provider/ebpf_provider.go b/x-pack/auditbeat/processors/sessionmd/provider/ebpf_provider/ebpf_provider.go
index 2b9b540e037c..f1b8bae0b671 100644
--- a/x-pack/auditbeat/processors/sessionmd/provider/ebpf_provider/ebpf_provider.go
+++ b/x-pack/auditbeat/processors/sessionmd/provider/ebpf_provider/ebpf_provider.go
@@ -9,6 +9,7 @@ package ebpf_provider
 import (
 	"context"
 	"fmt"
+	"time"
 
 	"github.com/elastic/beats/v7/libbeat/beat"
 	"github.com/elastic/beats/v7/libbeat/ebpf"
@@ -151,7 +152,80 @@ func NewProvider(ctx context.Context, logger *logp.Logger, db *processdb.DB) (pr
 	return &p, nil
 }
 
-func (s prvdr) UpdateDB(ev *beat.Event) error {
-	// no-op for ebpf, DB is updated from pushed ebpf events
-	return nil
+const (
+	maxWaitLimit      = 200 * time.Millisecond // Maximum time UpdateDB will wait for process
+	combinedWaitLimit = 2 * time.Second        // Multiple UpdateDB calls will wait up to this amount within resetDuration
+	backoffDuration   = 10 * time.Second       // UpdateDB will stop waiting for processes for this time
+	resetDuration     = 5 * time.Second        // After this amount of times with no backoffs, the combinedWait will be reset
+)
+
+var (
+	combinedWait   = 0 * time.Millisecond
+	inBackoff      = false
+	backoffStart   = time.Now()
+	since          = time.Now()
+	backoffSkipped = 0
+)
+
+// With ebpf, process events are pushed to the DB by the above goroutine, so this doesn't actually update the DB.
+// It does to try sync the processor and ebpf events, so that the process is in the process db before continuing.
+//
+// It's possible that the event to enrich arrives before the process is inserted into the DB. In that case, this
+// will block continuing the enrichment until the process is seen (or the timeout is reached).
+//
+// If for some reason a lot of time has been spent waiting for missing processes, this also has a backoff timer during
+// which it will continue without waiting for missing events to arrive, so the processor doesn't become overly backed-up
+// waiting for these processes, at the cost of possibly not enriching some processes.
+func (s prvdr) UpdateDB(ev *beat.Event, pid uint32) error {
+	if s.db.HasProcess(pid) {
+		return nil
+	}
+
+	now := time.Now()
+	if inBackoff {
+		if now.Sub(backoffStart) > backoffDuration {
+			s.logger.Warnf("ended backoff, skipped %d processes", backoffSkipped)
+			inBackoff = false
+			combinedWait = 0 * time.Millisecond
+		} else {
+			backoffSkipped += 1
+			return nil
+		}
+	} else {
+		if combinedWait > combinedWaitLimit {
+			s.logger.Warn("starting backoff")
+			inBackoff = true
+			backoffStart = now
+			backoffSkipped = 0
+			return nil
+		}
+		// maintain a moving window of time for the delays we track
+		if now.Sub(since) > resetDuration {
+			since = now
+			combinedWait = 0 * time.Millisecond
+		}
+	}
+
+	start := now
+	nextWait := 5 * time.Millisecond
+	for {
+		waited := time.Since(start)
+		if s.db.HasProcess(pid) {
+			s.logger.Debugf("got process that was missing after %v", waited)
+			combinedWait = combinedWait + waited
+			return nil
+		}
+		if waited >= maxWaitLimit {
+			e := fmt.Errorf("process %v was not seen after %v", pid, waited)
+			s.logger.Warnf("%w", e)
+			combinedWait = combinedWait + waited
+			return e
+		}
+		time.Sleep(nextWait)
+		if nextWait*2+waited > maxWaitLimit {
+			nextWait = maxWaitLimit - waited
+		} else {
+			nextWait = nextWait * 2
+		}
+	}
 }
diff --git a/x-pack/auditbeat/processors/sessionmd/provider/procfs_provider/procfs_provider.go b/x-pack/auditbeat/processors/sessionmd/provider/procfs_provider/procfs_provider.go
index 2f99dd72b1fb..6525b860b6d2 100644
--- a/x-pack/auditbeat/processors/sessionmd/provider/procfs_provider/procfs_provider.go
+++ b/x-pack/auditbeat/processors/sessionmd/provider/procfs_provider/procfs_provider.go
@@ -41,16 +41,7 @@ func NewProvider(ctx context.Context, logger *logp.Logger, db *processdb.DB, rea
 }
 
 // UpdateDB will update the process DB with process info from procfs or the event itself
-func (s prvdr) UpdateDB(ev *beat.Event) error {
-	pi, err := ev.Fields.GetValue(s.pidField)
-	if err != nil {
-		return fmt.Errorf("event not supported, no pid")
-	}
-	pid, ok := pi.(int)
-	if !ok {
-		return fmt.Errorf("pid field not int")
-	}
-
+func (s prvdr) UpdateDB(ev *beat.Event, pid uint32) error {
 	syscall, err := ev.GetValue(syscallField)
 	if err != nil {
 		return fmt.Errorf("event not supported, no syscall data")
@@ -59,7 +50,7 @@ func (s prvdr) UpdateDB(ev *beat.Event) error {
 	switch syscall {
 	case "execveat", "execve":
 		pe := types.ProcessExecEvent{}
-		proc_info, err := s.reader.GetProcess(uint32(pid))
+		proc_info, err := s.reader.GetProcess(pid)
 		if err == nil {
 			pe.PIDs = proc_info.PIDs
 			pe.Creds = proc_info.Creds
@@ -72,7 +63,7 @@ func (s prvdr) UpdateDB(ev *beat.Event) error {
 			s.logger.Warnf("couldn't get process info from proc for pid %v: %w", pid, err)
 			// If process info couldn't be taken from procfs, populate with as much info as
 			// possible from the event
-			pe.PIDs.Tgid = uint32(pid)
+			pe.PIDs.Tgid = pid
 			var intr interface{}
 			var i int
 			var ok bool
@@ -106,7 +97,7 @@ func (s prvdr) UpdateDB(ev *beat.Event) error {
 	case "exit_group":
 		pe := types.ProcessExitEvent{
 			PIDs: types.PIDInfo{
-				Tgid: uint32(pid),
+				Tgid: pid,
 			},
 		}
 		s.db.InsertExit(pe)
@@ -122,8 +113,8 @@ func (s prvdr) UpdateDB(ev *beat.Event) error {
 		if result == "success" {
 			setsid_ev := types.ProcessSetsidEvent{
 				PIDs: types.PIDInfo{
-					Tgid: uint32(pid),
-					Sid:  uint32(pid),
+					Tgid: pid,
+					Sid:  pid,
 				},
 			}
 			s.db.InsertSetsid(setsid_ev)
diff --git a/x-pack/auditbeat/processors/sessionmd/provider/procfs_provider/procfs_provider_test.go b/x-pack/auditbeat/processors/sessionmd/provider/procfs_provider/procfs_provider_test.go
index 6fd333c47119..c438efcfe1ae 100644
--- a/x-pack/auditbeat/processors/sessionmd/provider/procfs_provider/procfs_provider_test.go
+++ b/x-pack/auditbeat/processors/sessionmd/provider/procfs_provider/procfs_provider_test.go
@@ -124,7 +124,7 @@ func TestExecveEvent(t *testing.T) {
 	provider, err := NewProvider(context.TODO(), &logger, db, reader, "process.pid")
 	require.Nil(t, err, "error creating provider")
 
-	err = provider.UpdateDB(&event)
+	err = provider.UpdateDB(&event, expected.PIDs.Tgid)
 	require.Nil(t, err)
 
 	actual, err := db.GetProcess(pid)
@@ -234,7 +234,7 @@ func TestExecveatEvent(t *testing.T) {
 	provider, err := NewProvider(context.TODO(), &logger, db, reader, "process.pid")
 	require.Nil(t, err, "error creating provider")
 
-	err = provider.UpdateDB(&event)
+	err = provider.UpdateDB(&event, expected.PIDs.Tgid)
 	require.Nil(t, err)
 
 	actual, err := db.GetProcess(pid)
@@ -317,7 +317,7 @@ func TestSetSidEvent(t *testing.T) {
 	provider, err := NewProvider(context.TODO(), &logger, db, reader, "process.pid")
 	require.Nil(t, err, "error creating provider")
 
-	err = provider.UpdateDB(&event)
+	err = provider.UpdateDB(&event, expected.PIDs.Tgid)
 	require.Nil(t, err)
 
 	actual, err := db.GetProcess(pid)
@@ -399,7 +399,7 @@ func TestSetSidEventFailed(t *testing.T) {
 	provider, err := NewProvider(context.TODO(), &logger, db, reader, "process.pid")
 	require.Nil(t, err, "error creating provider")
 
-	err = provider.UpdateDB(&event)
+	err = provider.UpdateDB(&event, expected.PIDs.Tgid)
 	require.Nil(t, err)
 
 	actual, err := db.GetProcess(pid)
@@ -470,7 +470,7 @@ func TestSetSidSessionLeaderNotScraped(t *testing.T) {
 	provider, err := NewProvider(context.TODO(), &logger, db, reader, "process.pid")
 	require.Nil(t, err, "error creating provider")
 
-	err = provider.UpdateDB(&event)
+	err = provider.UpdateDB(&event, expected.PIDs.Tgid)
 	require.Nil(t, err)
 
 	actual, err := db.GetProcess(pid)
diff --git a/x-pack/auditbeat/processors/sessionmd/provider/provider.go b/x-pack/auditbeat/processors/sessionmd/provider/provider.go
index e3fa1547806c..6452eb9e2bf7 100644
--- a/x-pack/auditbeat/processors/sessionmd/provider/provider.go
+++ b/x-pack/auditbeat/processors/sessionmd/provider/provider.go
@@ -11,5 +11,5 @@ import (
 )
 
 type Provider interface {
-	UpdateDB(*beat.Event) error
+	UpdateDB(*beat.Event, uint32) error
 }
diff --git a/x-pack/filebeat/input/awss3/input.go b/x-pack/filebeat/input/awss3/input.go
index 733de949f298..51e8c9808edb 100644
--- a/x-pack/filebeat/input/awss3/input.go
+++ b/x-pack/filebeat/input/awss3/input.go
@@ -13,6 +13,7 @@ import (
 	"time"
 
 	awssdk "github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/aws/retry"
 	"github.com/aws/aws-sdk-go-v2/service/s3"
 	"github.com/aws/aws-sdk-go-v2/service/sqs"
 	"github.com/aws/smithy-go"
@@ -21,7 +22,6 @@ import (
 	v2 "github.com/elastic/beats/v7/filebeat/input/v2"
 	"github.com/elastic/beats/v7/libbeat/beat"
 	"github.com/elastic/beats/v7/libbeat/feature"
-	"github.com/elastic/beats/v7/libbeat/statestore"
 	awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws"
 	conf "github.com/elastic/elastic-agent-libs/config"
 	"github.com/elastic/go-concert/unison"
@@ -99,78 +99,88 @@ func (in *s3Input) Test(ctx v2.TestContext) error {
 }
 
 func (in *s3Input) Run(inputContext v2.Context, pipeline beat.Pipeline) error {
-	var err error
+	ctx := v2.GoContextFromCanceler(inputContext.Cancelation)
 
-	persistentStore, err := in.store.Access()
-	if err != nil {
-		return fmt.Errorf("can not access persistent store: %w", err)
+	if in.config.QueueURL != "" {
+		return in.runQueueReader(ctx, inputContext, pipeline)
 	}
 
-	defer persistentStore.Close()
+	if in.config.BucketARN != "" || in.config.NonAWSBucketName != "" {
+		return in.runS3Poller(ctx, inputContext, pipeline)
+	}
 
-	states := newStates(inputContext)
-	err = states.readStatesFrom(persistentStore)
-	if err != nil {
-		return fmt.Errorf("can not start persistent store: %w", err)
+	return nil
+}
+
+func (in *s3Input) runQueueReader(
+	ctx context.Context,
+	inputContext v2.Context,
+	pipeline beat.Pipeline,
+) error {
+	configRegion := in.config.RegionName
+	urlRegion, err := getRegionFromQueueURL(in.config.QueueURL, in.config.AWSConfig.Endpoint)
+	if err != nil && configRegion == "" {
+		// Only report an error if we don't have a configured region
+		// to fall back on.
+		return fmt.Errorf("failed to get AWS region from queue_url: %w", err)
+	} else if configRegion != "" && configRegion != urlRegion {
+		inputContext.Logger.Warnf("configured region disagrees with queue_url region (%q != %q): using %q", configRegion, urlRegion, urlRegion)
 	}
 
-	ctx := v2.GoContextFromCanceler(inputContext.Cancelation)
+	in.awsConfig.Region = urlRegion
 
-	if in.config.QueueURL != "" {
-		regionName, err := getRegionFromQueueURL(in.config.QueueURL, in.config.AWSConfig.Endpoint, in.config.RegionName)
-		if err != nil && in.config.RegionName == "" {
-			return fmt.Errorf("failed to get AWS region from queue_url: %w", err)
-		}
-		var warn regionMismatchError
-		if errors.As(err, &warn) {
-			// Warn of mismatch, but go ahead with configured region name.
-			inputContext.Logger.Warnf("%v: using %q", err, regionName)
-		}
-		in.awsConfig.Region = regionName
+	// Create SQS receiver and S3 notification processor.
+	receiver, err := in.createSQSReceiver(inputContext, pipeline)
+	if err != nil {
+		return fmt.Errorf("failed to initialize sqs receiver: %w", err)
+	}
+	defer receiver.metrics.Close()
 
-		// Create SQS receiver and S3 notification processor.
-		receiver, err := in.createSQSReceiver(inputContext, pipeline)
-		if err != nil {
-			return fmt.Errorf("failed to initialize sqs receiver: %w", err)
-		}
-		defer receiver.metrics.Close()
+	// Poll metrics periodically in the background
+	go pollSqsWaitingMetric(ctx, receiver)
 
-		// Poll metrics periodically in the background
-		go pollSqsWaitingMetric(ctx, receiver)
+	return receiver.Receive(ctx)
+}
 
-		if err := receiver.Receive(ctx); err != nil {
-			return err
-		}
+func (in *s3Input) runS3Poller(
+	ctx context.Context,
+	inputContext v2.Context,
+	pipeline beat.Pipeline,
+) error {
+	// Create client for publishing events and receive notification of their ACKs.
+	client, err := pipeline.ConnectWith(beat.ClientConfig{
+		EventListener: awscommon.NewEventACKHandler(),
+		Processing: beat.ProcessingConfig{
+			// This input only produces events with basic types so normalization
+			// is not required.
+			EventNormalization: boolPtr(false),
+		},
+	})
+	if err != nil {
+		return fmt.Errorf("failed to create pipeline client: %w", err)
 	}
+	defer client.Close()
 
-	if in.config.BucketARN != "" || in.config.NonAWSBucketName != "" {
-		// Create client for publishing events and receive notification of their ACKs.
-		client, err := pipeline.ConnectWith(beat.ClientConfig{
-			EventListener: awscommon.NewEventACKHandler(),
-			Processing: beat.ProcessingConfig{
-				// This input only produces events with basic types so normalization
-				// is not required.
-				EventNormalization: boolPtr(false),
-			},
-		})
-		if err != nil {
-			return fmt.Errorf("failed to create pipeline client: %w", err)
-		}
-		defer client.Close()
+	// Connect to the registry and create our states lookup
+	persistentStore, err := in.store.Access()
+	if err != nil {
+		return fmt.Errorf("can not access persistent store: %w", err)
+	}
+	defer persistentStore.Close()
 
-		// Create S3 receiver and S3 notification processor.
-		poller, err := in.createS3Lister(inputContext, ctx, client, persistentStore, states)
-		if err != nil {
-			return fmt.Errorf("failed to initialize s3 poller: %w", err)
-		}
-		defer poller.metrics.Close()
+	states, err := newStates(inputContext, persistentStore)
+	if err != nil {
+		return fmt.Errorf("can not start persistent store: %w", err)
+	}
 
-		if err := poller.Poll(ctx); err != nil {
-			return err
-		}
+	// Create S3 receiver and S3 notification processor.
+	poller, err := in.createS3Poller(inputContext, ctx, client, states)
+	if err != nil {
+		return fmt.Errorf("failed to initialize s3 poller: %w", err)
 	}
+	defer poller.metrics.Close()
 
-	return nil
+	return poller.Poll(ctx)
 }
 
 func (in *s3Input) createSQSReceiver(ctx v2.Context, pipeline beat.Pipeline) (*sqsReader, error) {
@@ -215,8 +225,11 @@ func (in *s3Input) createSQSReceiver(ctx v2.Context, pipeline beat.Pipeline) (*s
 		return nil, err
 	}
 	in.metrics = newInputMetrics(ctx.ID, nil, in.config.MaxNumberOfMessages)
+
 	s3EventHandlerFactory := newS3ObjectProcessorFactory(log.Named("s3"), in.metrics, s3API, fileSelectors, in.config.BackupConfig, in.config.MaxNumberOfMessages)
+
 	sqsMessageHandler := newSQSS3EventProcessor(log.Named("sqs_s3_event"), in.metrics, sqsAPI, script, in.config.VisibilityTimeout, in.config.SQSMaxReceiveCount, pipeline, s3EventHandlerFactory, in.config.MaxNumberOfMessages)
+
 	sqsReader := newSQSReader(log.Named("sqs"), in.metrics, sqsAPI, in.config.MaxNumberOfMessages, sqsMessageHandler)
 
 	return sqsReader, nil
@@ -230,7 +243,7 @@ func (n nonAWSBucketResolver) ResolveEndpoint(region string, options s3.Endpoint
 	return awssdk.Endpoint{URL: n.endpoint, SigningRegion: region, HostnameImmutable: true, Source: awssdk.EndpointSourceCustom}, nil
 }
 
-func (in *s3Input) createS3Lister(ctx v2.Context, cancelCtx context.Context, client beat.Client, persistentStore *statestore.Store, states *states) (*s3Poller, error) {
+func (in *s3Input) createS3Poller(ctx v2.Context, cancelCtx context.Context, client beat.Client, states *states) (*s3Poller, error) {
 	var bucketName string
 	var bucketID string
 	if in.config.NonAWSBucketName != "" {
@@ -250,6 +263,12 @@ func (in *s3Input) createS3Lister(ctx v2.Context, cancelCtx context.Context, cli
 			o.EndpointOptions.UseFIPSEndpoint = awssdk.FIPSEndpointStateEnabled
 		}
 		o.UsePathStyle = in.config.PathStyle
+
+		o.Retryer = retry.NewStandard(func(so *retry.StandardOptions) {
+			so.MaxAttempts = 5
+			// Recover quickly when requests start working again
+			so.NoRetryIncrement = 100
+		})
 	})
 	regionName, err := getRegionForBucket(cancelCtx, s3Client, bucketName)
 	if err != nil {
@@ -295,7 +314,6 @@ func (in *s3Input) createS3Lister(ctx v2.Context, cancelCtx context.Context, cli
 		client,
 		s3EventHandlerFactory,
 		states,
-		persistentStore,
 		bucketID,
 		in.config.BucketListPrefix,
 		in.awsConfig.Region,
@@ -308,7 +326,7 @@ func (in *s3Input) createS3Lister(ctx v2.Context, cancelCtx context.Context, cli
 
 var errBadQueueURL = errors.New("QueueURL is not in format: https://sqs.{REGION_ENDPOINT}.{ENDPOINT}/{ACCOUNT_NUMBER}/{QUEUE_NAME} or https://{VPC_ENDPOINT}.sqs.{REGION_ENDPOINT}.vpce.{ENDPOINT}/{ACCOUNT_NUMBER}/{QUEUE_NAME}")
 
-func getRegionFromQueueURL(queueURL string, endpoint, defaultRegion string) (region string, err error) {
+func getRegionFromQueueURL(queueURL, endpoint string) (string, error) {
 	// get region from queueURL
 	// Example for sqs queue: https://sqs.us-east-1.amazonaws.com/12345678912/test-s3-logs
 	// Example for vpce: https://vpce-test.sqs.us-east-1.vpce.amazonaws.com/12345678912/sqs-queue
@@ -321,11 +339,7 @@ func getRegionFromQueueURL(queueURL string, endpoint, defaultRegion string) (reg
 		// check for sqs queue url
 		if len(queueHostSplit) == 3 && queueHostSplit[0] == "sqs" {
 			if queueHostSplit[2] == endpoint || (endpoint == "" && strings.HasPrefix(queueHostSplit[2], "amazonaws.")) {
-				region = queueHostSplit[1]
-				if defaultRegion != "" && region != defaultRegion {
-					return defaultRegion, regionMismatchError{queueURLRegion: region, defaultRegion: defaultRegion}
-				}
-				return region, nil
+				return queueHostSplit[1], nil
 			}
 		}
 
@@ -333,30 +347,13 @@ func getRegionFromQueueURL(queueURL string, endpoint, defaultRegion string) (reg
 		queueHostSplitVPC := strings.SplitN(u.Host, ".", 5)
 		if len(queueHostSplitVPC) == 5 && queueHostSplitVPC[1] == "sqs" {
 			if queueHostSplitVPC[4] == endpoint || (endpoint == "" && strings.HasPrefix(queueHostSplitVPC[4], "amazonaws.")) {
-				region = queueHostSplitVPC[2]
-				if defaultRegion != "" && region != defaultRegion {
-					return defaultRegion, regionMismatchError{queueURLRegion: region, defaultRegion: defaultRegion}
-				}
-				return region, nil
+				return queueHostSplitVPC[2], nil
 			}
 		}
-
-		if defaultRegion != "" {
-			return defaultRegion, nil
-		}
 	}
 	return "", errBadQueueURL
 }
 
-type regionMismatchError struct {
-	queueURLRegion string
-	defaultRegion  string
-}
-
-func (e regionMismatchError) Error() string {
-	return fmt.Sprintf("configured region disagrees with queue_url region: %q != %q", e.queueURLRegion, e.defaultRegion)
-}
-
 func getRegionForBucket(ctx context.Context, s3Client *s3.Client, bucketName string) (string, error) {
 	getBucketLocationOutput, err := s3Client.GetBucketLocation(ctx, &s3.GetBucketLocationInput{
 		Bucket: awssdk.String(bucketName),
diff --git a/x-pack/filebeat/input/awss3/input_benchmark_test.go b/x-pack/filebeat/input/awss3/input_benchmark_test.go
index e05e5b461ca6..5d22d1411687 100644
--- a/x-pack/filebeat/input/awss3/input_benchmark_test.go
+++ b/x-pack/filebeat/input/awss3/input_benchmark_test.go
@@ -8,7 +8,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"io/ioutil"
 	"os"
 	"path/filepath"
 	"runtime"
@@ -16,6 +15,8 @@ import (
 	"testing"
 	"time"
 
+	"github.com/stretchr/testify/assert"
+
 	"github.com/elastic/beats/v7/libbeat/statestore"
 	"github.com/elastic/beats/v7/libbeat/statestore/storetest"
 
@@ -132,7 +133,7 @@ type constantS3 struct {
 var _ s3API = (*constantS3)(nil)
 
 func newConstantS3(t testing.TB) *constantS3 {
-	data, err := ioutil.ReadFile(cloudtrailTestFile)
+	data, err := os.ReadFile(cloudtrailTestFile)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -342,14 +343,11 @@ func benchmarkInputS3(t *testing.T, numberOfWorkers int) testing.BenchmarkResult
 					return
 				}
 
-				err = store.Set(awsS3WriteCommitPrefix+"bucket"+listPrefix, &commitWriteState{time.Time{}})
-				if err != nil {
-					errChan <- err
-					return
-				}
+				states, err := newStates(inputCtx, store)
+				assert.NoError(t, err, "states creation should succeed")
 
 				s3EventHandlerFactory := newS3ObjectProcessorFactory(log.Named("s3"), metrics, s3API, config.FileSelectors, backupConfig{}, numberOfWorkers)
-				s3Poller := newS3Poller(logp.NewLogger(inputName), metrics, s3API, client, s3EventHandlerFactory, newStates(inputCtx), store, "bucket", listPrefix, "region", "provider", numberOfWorkers, time.Second)
+				s3Poller := newS3Poller(logp.NewLogger(inputName), metrics, s3API, client, s3EventHandlerFactory, states, "bucket", listPrefix, "region", "provider", numberOfWorkers, time.Second)
 
 				if err := s3Poller.Poll(ctx); err != nil {
 					if !errors.Is(err, context.DeadlineExceeded) {
diff --git a/x-pack/filebeat/input/awss3/input_test.go b/x-pack/filebeat/input/awss3/input_test.go
index abc9f5c9a6a6..0a3053f7f1b9 100644
--- a/x-pack/filebeat/input/awss3/input_test.go
+++ b/x-pack/filebeat/input/awss3/input_test.go
@@ -54,7 +54,6 @@ func TestGetRegionFromQueueURL(t *testing.T) {
 		name     string
 		queueURL string
 		endpoint string
-		deflt    string
 		want     string
 		wantErr  error
 	}{
@@ -77,7 +76,6 @@ func TestGetRegionFromQueueURL(t *testing.T) {
 		{
 			name:     "vpce_endpoint",
 			queueURL: "https://vpce-test.sqs.us-east-2.vpce.amazonaws.com/12345678912/sqs-queue",
-			deflt:    "",
 			want:     "us-east-2",
 		},
 		{
@@ -90,7 +88,7 @@ func TestGetRegionFromQueueURL(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			got, err := getRegionFromQueueURL(test.queueURL, test.endpoint, test.deflt)
+			got, err := getRegionFromQueueURL(test.queueURL, test.endpoint)
 			if !sameError(err, test.wantErr) {
 				t.Errorf("unexpected error: got:%v want:%v", err, test.wantErr)
 			}
diff --git a/x-pack/filebeat/input/awss3/s3.go b/x-pack/filebeat/input/awss3/s3.go
index 5aa8d31e95de..8909f78bb39d 100644
--- a/x-pack/filebeat/input/awss3/s3.go
+++ b/x-pack/filebeat/input/awss3/s3.go
@@ -11,34 +11,22 @@ import (
 	"sync"
 	"time"
 
-	"github.com/gofrs/uuid"
-	"go.uber.org/multierr"
+	"github.com/aws/aws-sdk-go-v2/aws/ratelimit"
 
 	"github.com/elastic/beats/v7/libbeat/beat"
-	"github.com/elastic/beats/v7/libbeat/statestore"
+	"github.com/elastic/beats/v7/libbeat/common/backoff"
 	awscommon "github.com/elastic/beats/v7/x-pack/libbeat/common/aws"
 	"github.com/elastic/elastic-agent-libs/logp"
 	"github.com/elastic/go-concert/timed"
 )
 
-const maxCircuitBreaker = 5
-
-type commitWriteState struct {
-	time.Time
-}
-
-type s3ObjectInfo struct {
-	name         string
-	key          string
-	etag         string
-	lastModified time.Time
-	listingID    string
-}
+// var instead of const so it can be reduced during unit tests (instead of waiting
+// through 10 minutes of retry backoff)
+var readerLoopMaxCircuitBreaker = 10
 
 type s3ObjectPayload struct {
 	s3ObjectHandler s3ObjectHandler
-	s3ObjectInfo    s3ObjectInfo
-	s3ObjectEvent   s3EventV2
+	objectState     state
 }
 
 type s3Poller struct {
@@ -48,15 +36,12 @@ type s3Poller struct {
 	region               string
 	provider             string
 	bucketPollInterval   time.Duration
-	workerSem            *awscommon.Sem
 	s3                   s3API
 	log                  *logp.Logger
 	metrics              *inputMetrics
 	client               beat.Client
 	s3ObjectHandler      s3ObjectHandlerFactory
 	states               *states
-	store                *statestore.Store
-	workersListingMap    *sync.Map
 	workersProcessingMap *sync.Map
 }
 
@@ -66,7 +51,6 @@ func newS3Poller(log *logp.Logger,
 	client beat.Client,
 	s3ObjectHandler s3ObjectHandlerFactory,
 	states *states,
-	store *statestore.Store,
 	bucket string,
 	listPrefix string,
 	awsRegion string,
@@ -85,41 +69,17 @@ func newS3Poller(log *logp.Logger,
 		region:               awsRegion,
 		provider:             provider,
 		bucketPollInterval:   bucketPollInterval,
-		workerSem:            awscommon.NewSem(numberOfWorkers),
 		s3:                   s3,
 		log:                  log,
 		metrics:              metrics,
 		client:               client,
 		s3ObjectHandler:      s3ObjectHandler,
 		states:               states,
-		store:                store,
-		workersListingMap:    new(sync.Map),
 		workersProcessingMap: new(sync.Map),
 	}
 }
 
-func (p *s3Poller) handlePurgingLock(info s3ObjectInfo, isStored bool) {
-	id := stateID(info.name, info.key, info.etag, info.lastModified)
-	previousState := p.states.FindPreviousByID(id)
-	if !previousState.IsEmpty() {
-		if isStored {
-			previousState.MarkAsStored()
-		} else {
-			previousState.MarkAsError()
-		}
-
-		p.states.Update(previousState, info.listingID)
-	}
-
-	// Manage locks for purging.
-	if p.states.IsListingFullyStored(info.listingID) {
-		// locked on processing we unlock when all the object were ACKed
-		lock, _ := p.workersListingMap.Load(info.listingID)
-		lock.(*sync.Mutex).Unlock()
-	}
-}
-
-func (p *s3Poller) createS3ObjectProcessor(ctx context.Context, state state) (s3ObjectHandler, s3EventV2) {
+func (p *s3Poller) createS3ObjectProcessor(ctx context.Context, state state) s3ObjectHandler {
 	event := s3EventV2{}
 	event.AWSRegion = p.region
 	event.Provider = p.provider
@@ -129,275 +89,126 @@ func (p *s3Poller) createS3ObjectProcessor(ctx context.Context, state state) (s3
 
 	acker := awscommon.NewEventACKTracker(ctx)
 
-	return p.s3ObjectHandler.Create(ctx, p.log, p.client, acker, event), event
+	return p.s3ObjectHandler.Create(ctx, p.log, p.client, acker, event)
 }
 
-func (p *s3Poller) ProcessObject(s3ObjectPayloadChan <-chan *s3ObjectPayload) error {
-	var errs []error
+func (p *s3Poller) workerLoop(ctx context.Context, s3ObjectPayloadChan <-chan *s3ObjectPayload) {
+	rateLimitWaiter := backoff.NewEqualJitterBackoff(ctx.Done(), 1, 120)
 
 	for s3ObjectPayload := range s3ObjectPayloadChan {
-		// Process S3 object (download, parse, create events).
-		err := s3ObjectPayload.s3ObjectHandler.ProcessS3Object()
+		objHandler := s3ObjectPayload.s3ObjectHandler
+		state := s3ObjectPayload.objectState
 
-		// Wait for all events to be ACKed before proceeding.
-		s3ObjectPayload.s3ObjectHandler.Wait()
+		// Process S3 object (download, parse, create events).
+		err := objHandler.ProcessS3Object()
+		if errors.Is(err, errS3DownloadFailed) {
+			// Download errors are ephemeral. Add a backoff delay, then skip to the
+			// next iteration so we don't mark the object as permanently failed.
+			rateLimitWaiter.Wait()
+			continue
+		}
+		// Reset the rate limit delay on results that aren't download errors.
+		rateLimitWaiter.Reset()
 
-		info := s3ObjectPayload.s3ObjectInfo
+		// Wait for downloaded objects to be ACKed.
+		objHandler.Wait()
 
 		if err != nil {
-			event := s3ObjectPayload.s3ObjectEvent
-			errs = append(errs,
-				fmt.Errorf(
-					fmt.Sprintf("failed processing S3 event for object key %q in bucket %q: %%w",
-						event.S3.Object.Key, event.S3.Bucket.Name),
-					err))
-
-			p.handlePurgingLock(info, false)
-			continue
+			p.log.Errorf("failed processing S3 event for object key %q in bucket %q: %v",
+				state.Key, state.Bucket, err.Error())
+
+			// Non-retryable error.
+			state.Failed = true
+		} else {
+			state.Stored = true
 		}
 
-		p.handlePurgingLock(info, true)
+		// Persist the result
+		p.states.AddState(state)
 
 		// Metrics
 		p.metrics.s3ObjectsAckedTotal.Inc()
 	}
-
-	return multierr.Combine(errs...)
 }
 
-func (p *s3Poller) GetS3Objects(ctx context.Context, s3ObjectPayloadChan chan<- *s3ObjectPayload) {
+func (p *s3Poller) readerLoop(ctx context.Context, s3ObjectPayloadChan chan<- *s3ObjectPayload) {
 	defer close(s3ObjectPayloadChan)
 
 	bucketName := getBucketNameFromARN(p.bucket)
 
+	errorBackoff := backoff.NewEqualJitterBackoff(ctx.Done(), 1, 120)
 	circuitBreaker := 0
 	paginator := p.s3.ListObjectsPaginator(bucketName, p.listPrefix)
 	for paginator.HasMorePages() {
 		page, err := paginator.NextPage(ctx)
-		if err != nil {
-			if !paginator.HasMorePages() {
-				break
-			}
 
+		if err != nil {
 			p.log.Warnw("Error when paginating listing.", "error", err)
-			circuitBreaker++
-			if circuitBreaker >= maxCircuitBreaker {
-				p.log.Warnw(fmt.Sprintf("%d consecutive error when paginating listing, breaking the circuit.", circuitBreaker), "error", err)
-				break
+			// QuotaExceededError is client-side rate limiting in the AWS sdk,
+			// don't include it in the circuit breaker count
+			if !errors.As(err, &ratelimit.QuotaExceededError{}) {
+				circuitBreaker++
+				if circuitBreaker >= readerLoopMaxCircuitBreaker {
+					p.log.Warnw(fmt.Sprintf("%d consecutive error when paginating listing, breaking the circuit.", circuitBreaker), "error", err)
+					break
+				}
 			}
+			// add a backoff delay and try again
+			errorBackoff.Wait()
 			continue
 		}
+		// Reset the circuit breaker and the error backoff if a read is successful
+		circuitBreaker = 0
+		errorBackoff.Reset()
 
-		listingID, err := uuid.NewV4()
-		if err != nil {
-			p.log.Warnw("Error generating UUID for listing page.", "error", err)
-			continue
-		}
-
-		// lock for the listing page and state in workersListingMap
-		// this map is shared with the storedOp and will be unlocked there
-		lock := new(sync.Mutex)
-		lock.Lock()
-		p.workersListingMap.Store(listingID.String(), lock)
-
-		totProcessableObjects := 0
 		totListedObjects := len(page.Contents)
-		s3ObjectPayloadChanByPage := make(chan *s3ObjectPayload, totListedObjects)
 
 		// Metrics
 		p.metrics.s3ObjectsListedTotal.Add(uint64(totListedObjects))
 		for _, object := range page.Contents {
-			state := newState(bucketName, *object.Key, *object.ETag, p.listPrefix, *object.LastModified)
-			if p.states.MustSkip(state, p.store) {
+			state := newState(bucketName, *object.Key, *object.ETag, *object.LastModified)
+			if p.states.IsProcessed(state) {
 				p.log.Debugw("skipping state.", "state", state)
 				continue
 			}
 
-			// we have no previous state or the previous state
-			// is not stored: refresh the state
-			previousState := p.states.FindPrevious(state)
-			if previousState.IsEmpty() || !previousState.IsProcessed() {
-				p.states.Update(state, "")
-			}
-
-			s3Processor, event := p.createS3ObjectProcessor(ctx, state)
+			s3Processor := p.createS3ObjectProcessor(ctx, state)
 			if s3Processor == nil {
 				p.log.Debugw("empty s3 processor.", "state", state)
 				continue
 			}
 
-			totProcessableObjects++
-
-			s3ObjectPayloadChanByPage <- &s3ObjectPayload{
+			s3ObjectPayloadChan <- &s3ObjectPayload{
 				s3ObjectHandler: s3Processor,
-				s3ObjectInfo: s3ObjectInfo{
-					name:         bucketName,
-					key:          *object.Key,
-					etag:         *object.ETag,
-					lastModified: *object.LastModified,
-					listingID:    listingID.String(),
-				},
-				s3ObjectEvent: event,
-			}
-		}
-
-		if totProcessableObjects == 0 {
-			p.log.Debugw("0 processable objects on bucket pagination.", "bucket", p.bucket, "listPrefix", p.listPrefix, "listingID", listingID)
-			// nothing to be ACKed, unlock here
-			p.states.DeleteListing(listingID.String())
-			lock.Unlock()
-		} else {
-			listingInfo := &listingInfo{totObjects: totProcessableObjects}
-			p.states.AddListing(listingID.String(), listingInfo)
-
-			// Metrics
-			p.metrics.s3ObjectsProcessedTotal.Add(uint64(totProcessableObjects))
-		}
-
-		close(s3ObjectPayloadChanByPage)
-		for s3ObjectPayload := range s3ObjectPayloadChanByPage {
-			s3ObjectPayloadChan <- s3ObjectPayload
-		}
-	}
-}
-
-func (p *s3Poller) Purge(ctx context.Context) {
-	listingIDs := p.states.GetListingIDs()
-	p.log.Debugw("purging listing.", "listingIDs", listingIDs)
-	for _, listingID := range listingIDs {
-		// we lock here in order to process the purge only after
-		// full listing page is ACKed by all the workers
-		lock, loaded := p.workersListingMap.Load(listingID)
-		if !loaded {
-			// purge calls can overlap, GetListingIDs can return
-			// an outdated snapshot with listing already purged
-			p.states.DeleteListing(listingID)
-			p.log.Debugw("deleting already purged listing from states.", "listingID", listingID)
-			continue
-		}
-
-		lock.(*sync.Mutex).Lock()
-
-		states := map[string]*state{}
-		latestStoredTimeByBucketAndListPrefix := make(map[string]time.Time, 0)
-
-		listingStates := p.states.GetStatesByListingID(listingID)
-		for i, state := range listingStates {
-			// it is not stored, keep
-			if !state.IsProcessed() {
-				p.log.Debugw("state not stored or with error, skip purge", "state", state)
-				continue
+				objectState:     state,
 			}
 
-			var latestStoredTime time.Time
-			states[state.ID] = &listingStates[i]
-			latestStoredTime, ok := latestStoredTimeByBucketAndListPrefix[state.Bucket+state.ListPrefix]
-			if !ok {
-				var commitWriteState commitWriteState
-				err := p.store.Get(awsS3WriteCommitPrefix+state.Bucket+state.ListPrefix, &commitWriteState)
-				if err == nil {
-					// we have no entry in the map, and we have no entry in the store
-					// set zero time
-					latestStoredTime = time.Time{}
-					p.log.Debugw("last stored time is zero time", "bucket", state.Bucket, "listPrefix", state.ListPrefix)
-				} else {
-					latestStoredTime = commitWriteState.Time
-					p.log.Debugw("last stored time is commitWriteState", "commitWriteState", commitWriteState, "bucket", state.Bucket, "listPrefix", state.ListPrefix)
-				}
-			} else {
-				p.log.Debugw("last stored time from memory", "latestStoredTime", latestStoredTime, "bucket", state.Bucket, "listPrefix", state.ListPrefix)
-			}
-
-			if state.LastModified.After(latestStoredTime) {
-				p.log.Debugw("last stored time updated", "state.LastModified", state.LastModified, "bucket", state.Bucket, "listPrefix", state.ListPrefix)
-				latestStoredTimeByBucketAndListPrefix[state.Bucket+state.ListPrefix] = state.LastModified
-			}
-		}
-
-		for key := range states {
-			p.states.Delete(key)
-		}
-
-		if err := p.states.writeStates(p.store); err != nil {
-			p.log.Errorw("Failed to write states to the registry", "error", err)
-		}
-
-		for bucketAndListPrefix, latestStoredTime := range latestStoredTimeByBucketAndListPrefix {
-			if err := p.store.Set(awsS3WriteCommitPrefix+bucketAndListPrefix, commitWriteState{latestStoredTime}); err != nil {
-				p.log.Errorw("Failed to write commit time to the registry", "error", err)
-			}
-		}
-
-		// purge is done, we can unlock and clean
-		lock.(*sync.Mutex).Unlock()
-		p.workersListingMap.Delete(listingID)
-		p.states.DeleteListing(listingID)
-
-		// Listing is removed from all states, we can finalize now
-		for _, state := range states {
-			processor, _ := p.createS3ObjectProcessor(ctx, *state)
-			if err := processor.FinalizeS3Object(); err != nil {
-				p.log.Errorw("Failed to finalize S3 object", "key", state.Key, "error", err)
-			}
+			p.metrics.s3ObjectsProcessedTotal.Inc()
 		}
 	}
 }
 
 func (p *s3Poller) Poll(ctx context.Context) error {
-	// This loop tries to keep the workers busy as much as possible while
-	// honoring the number in config opposed to a simpler loop that does one
-	//  listing, sequentially processes every object and then does another listing
-	workerWg := new(sync.WaitGroup)
 	for ctx.Err() == nil {
-		// Determine how many S3 workers are available.
-		workers, err := p.workerSem.AcquireContext(p.numberOfWorkers, ctx)
-		if err != nil {
-			break
-		}
-
-		if workers == 0 {
-			continue
-		}
+		var workerWg sync.WaitGroup
+		workChan := make(chan *s3ObjectPayload)
 
-		s3ObjectPayloadChan := make(chan *s3ObjectPayload)
-
-		workerWg.Add(1)
-		go func() {
-			defer func() {
-				workerWg.Done()
-			}()
-
-			p.GetS3Objects(ctx, s3ObjectPayloadChan)
-			p.Purge(ctx)
-		}()
-
-		workerWg.Add(workers)
-		for i := 0; i < workers; i++ {
+		// Start the worker goroutines to listen on the work channel
+		for i := 0; i < p.numberOfWorkers; i++ {
+			workerWg.Add(1)
 			go func() {
-				defer func() {
-					workerWg.Done()
-					p.workerSem.Release(1)
-				}()
-				if err := p.ProcessObject(s3ObjectPayloadChan); err != nil {
-					p.log.Warnw("Failed processing S3 listing.", "error", err)
-				}
+				defer workerWg.Done()
+				p.workerLoop(ctx, workChan)
 			}()
 		}
 
-		err = timed.Wait(ctx, p.bucketPollInterval)
-		if err != nil {
-			if errors.Is(err, context.Canceled) {
-				// A canceled context is a normal shutdown.
-				return nil
-			}
+		// Start reading data and wait for its processing to be done
+		p.readerLoop(ctx, workChan)
+		workerWg.Wait()
 
-			return err
-		}
+		_ = timed.Wait(ctx, p.bucketPollInterval)
 	}
 
-	// Wait for all workers to finish.
-	workerWg.Wait()
-
 	if errors.Is(ctx.Err(), context.Canceled) {
 		// A canceled context is a normal shutdown.
 		return nil
diff --git a/x-pack/filebeat/input/awss3/s3_objects.go b/x-pack/filebeat/input/awss3/s3_objects.go
index 32911778336b..21dfa2243e7b 100644
--- a/x-pack/filebeat/input/awss3/s3_objects.go
+++ b/x-pack/filebeat/input/awss3/s3_objects.go
@@ -43,6 +43,11 @@ type s3ObjectProcessorFactory struct {
 	backupConfig  backupConfig
 }
 
+// errS3DownloadFailed reports problems downloading an S3 object. Download errors
+// should never treated as permanent, they are just an indication to apply a
+// retry backoff until the connection is healthy again.
+var errS3DownloadFailed = errors.New("S3 download failure")
+
 func newS3ObjectProcessorFactory(log *logp.Logger, metrics *inputMetrics, s3 s3API, sel []fileSelectorConfig, backupConfig backupConfig, maxWorkers int) *s3ObjectProcessorFactory {
 	if metrics == nil {
 		// Metrics are optional. Initialize a stub.
@@ -135,8 +140,9 @@ func (p *s3ObjectProcessor) ProcessS3Object() error {
 	// Request object (download).
 	contentType, meta, body, err := p.download()
 	if err != nil {
-		return fmt.Errorf("failed to get s3 object (elapsed_time_ns=%d): %w",
-			time.Since(start).Nanoseconds(), err)
+		// Wrap downloadError in the result so the caller knows it's not a
+		// permanent failure.
+		return fmt.Errorf("%w: %w", errS3DownloadFailed, err)
 	}
 	defer body.Close()
 	p.s3Metadata = meta
@@ -434,10 +440,7 @@ func (p *s3ObjectProcessor) FinalizeS3Object() error {
 	if bucketName == "" {
 		return nil
 	}
-	backupKey := p.s3Obj.S3.Object.Key
-	if p.backupConfig.BackupToBucketPrefix != "" {
-		backupKey = fmt.Sprintf("%s%s", p.backupConfig.BackupToBucketPrefix, backupKey)
-	}
+	backupKey := p.backupConfig.BackupToBucketPrefix + p.s3Obj.S3.Object.Key
 	_, err := p.s3.CopyObject(p.ctx, p.s3Obj.S3.Bucket.Name, bucketName, p.s3Obj.S3.Object.Key, backupKey)
 	if err != nil {
 		return fmt.Errorf("failed to copy object to backup bucket: %w", err)
diff --git a/x-pack/filebeat/input/awss3/s3_objects_test.go b/x-pack/filebeat/input/awss3/s3_objects_test.go
index 6732c12e0579..28e8f4f42a52 100644
--- a/x-pack/filebeat/input/awss3/s3_objects_test.go
+++ b/x-pack/filebeat/input/awss3/s3_objects_test.go
@@ -8,7 +8,8 @@ import (
 	"bytes"
 	"context"
 	"errors"
-	"io/ioutil"
+	"io"
+	"os"
 	"path/filepath"
 	"strings"
 	"testing"
@@ -27,7 +28,7 @@ import (
 )
 
 func newS3Object(t testing.TB, filename, contentType string) (s3EventV2, *s3.GetObjectOutput) {
-	data, err := ioutil.ReadFile(filename)
+	data, err := os.ReadFile(filename)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -39,7 +40,7 @@ func newS3GetObjectResponse(filename string, data []byte, contentType string) *s
 	r := bytes.NewReader(data)
 	getObjectOutput := s3.GetObjectOutput{}
 	getObjectOutput.ContentLength = int64(r.Len())
-	getObjectOutput.Body = ioutil.NopCloser(r)
+	getObjectOutput.Body = io.NopCloser(r)
 	if contentType != "" {
 		getObjectOutput.ContentType = &contentType
 	}
@@ -157,7 +158,7 @@ func TestS3ObjectProcessor(t *testing.T) {
 		ack := awscommon.NewEventACKTracker(ctx)
 		err := s3ObjProc.Create(ctx, logp.NewLogger(inputName), mockPublisher, ack, s3Event).ProcessS3Object()
 		require.Error(t, err)
-		assert.True(t, errors.Is(err, errFakeConnectivityFailure), "expected errFakeConnectivityFailure error")
+		assert.True(t, errors.Is(err, errS3DownloadFailed), "expected errS3DownloadFailed")
 	})
 
 	t.Run("no error empty result in download", func(t *testing.T) {
diff --git a/x-pack/filebeat/input/awss3/s3_test.go b/x-pack/filebeat/input/awss3/s3_test.go
index b94ba7cfb09b..be1d65b796eb 100644
--- a/x-pack/filebeat/input/awss3/s3_test.go
+++ b/x-pack/filebeat/input/awss3/s3_test.go
@@ -13,7 +13,6 @@ import (
 	"github.com/aws/aws-sdk-go-v2/service/s3"
 	"github.com/aws/aws-sdk-go-v2/service/s3/types"
 	"github.com/golang/mock/gomock"
-	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
 	"github.com/elastic/beats/v7/libbeat/statestore"
@@ -134,12 +133,16 @@ func TestS3Poller(t *testing.T) {
 			Return(nil, errFakeConnectivityFailure)
 
 		s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockAPI, nil, backupConfig{}, numberOfWorkers)
-		receiver := newS3Poller(logp.NewLogger(inputName), nil, mockAPI, mockPublisher, s3ObjProc, newStates(inputCtx), store, bucket, "key", "region", "provider", numberOfWorkers, pollInterval)
+		states, err := newStates(inputCtx, store)
+		require.NoError(t, err, "states creation must succeed")
+		receiver := newS3Poller(logp.NewLogger(inputName), nil, mockAPI, mockPublisher, s3ObjProc, states, bucket, "key", "region", "provider", numberOfWorkers, pollInterval)
 		require.Error(t, context.DeadlineExceeded, receiver.Poll(ctx))
-		assert.Equal(t, numberOfWorkers, receiver.workerSem.Available())
 	})
 
-	t.Run("retry after Poll error", func(t *testing.T) {
+	t.Run("restart bucket scan after paging errors", func(t *testing.T) {
+		// Change the restart limit to 2 consecutive errors, so the test doesn't
+		// take too long to run
+		readerLoopMaxCircuitBreaker = 2
 		storeReg := statestore.NewRegistry(storetest.NewMemoryStoreBackend())
 		store, err := storeReg.Get("test")
 		if err != nil {
@@ -176,13 +179,13 @@ func TestS3Poller(t *testing.T) {
 		// Initial Next gets an error.
 		mockPagerFirst.EXPECT().
 			HasMorePages().
-			Times(10).
+			Times(2).
 			DoAndReturn(func() bool {
 				return true
 			})
 		mockPagerFirst.EXPECT().
 			NextPage(gomock.Any()).
-			Times(5).
+			Times(2).
 			DoAndReturn(func(_ context.Context, optFns ...func(*s3.Options)) (*s3.ListObjectsV2Output, error) {
 				return nil, errFakeConnectivityFailure
 			})
@@ -257,8 +260,9 @@ func TestS3Poller(t *testing.T) {
 			Return(nil, errFakeConnectivityFailure)
 
 		s3ObjProc := newS3ObjectProcessorFactory(logp.NewLogger(inputName), nil, mockAPI, nil, backupConfig{}, numberOfWorkers)
-		receiver := newS3Poller(logp.NewLogger(inputName), nil, mockAPI, mockPublisher, s3ObjProc, newStates(inputCtx), store, bucket, "key", "region", "provider", numberOfWorkers, pollInterval)
+		states, err := newStates(inputCtx, store)
+		require.NoError(t, err, "states creation must succeed")
+		receiver := newS3Poller(logp.NewLogger(inputName), nil, mockAPI, mockPublisher, s3ObjProc, states, bucket, "key", "region", "provider", numberOfWorkers, pollInterval)
 		require.Error(t, context.DeadlineExceeded, receiver.Poll(ctx))
-		assert.Equal(t, numberOfWorkers, receiver.workerSem.Available())
 	})
 }
diff --git a/x-pack/filebeat/input/awss3/state.go b/x-pack/filebeat/input/awss3/state.go
index 97fb8d538cd6..4b7e09f9e7fa 100644
--- a/x-pack/filebeat/input/awss3/state.go
+++ b/x-pack/filebeat/input/awss3/state.go
@@ -5,84 +5,52 @@
 package awss3
 
 import (
-	"fmt"
 	"time"
 )
 
 // state is used to communicate the publishing state of a s3 object
 type state struct {
-	// ID is used to identify the state in the store, and it is composed by
-	// Bucket + Key + Etag + LastModified.String(): changing this value or how it is
-	// composed will break backward compatibilities with entries already in the store.
-	ID           string    `json:"id" struct:"id"`
 	Bucket       string    `json:"bucket" struct:"bucket"`
 	Key          string    `json:"key" struct:"key"`
 	Etag         string    `json:"etag" struct:"etag"`
 	LastModified time.Time `json:"last_modified" struct:"last_modified"`
 
-	// ListPrefix is used for unique of the key in the store for awsS3WriteCommitPrefix
-	ListPrefix string `json:"list_prefix" struct:"list_prefix"`
-
 	// A state has Stored = true when all events are ACKed.
 	Stored bool `json:"stored" struct:"stored"`
-	// A state has Error = true when ProcessS3Object returned an error
-	Error bool `json:"error" struct:"error"`
+
+	// Failed is true when ProcessS3Object returned an error other than
+	// s3DownloadError.
+	// Before 8.14, this field was called "error". However, that field was
+	// set for many ephemeral reasons including client-side rate limiting
+	// (see https://github.com/elastic/beats/issues/39114). Now that we
+	// don't treat download errors as permanent, the field name was changed
+	// so that users upgrading from old versions aren't prevented from
+	// retrying old download failures.
+	Failed bool `json:"failed" struct:"failed"`
 }
 
+// ID is used to identify the state in the store, and it is composed by
+// Bucket + Key + Etag + LastModified.String(): changing this value or how it is
+// composed will break backward compatibilities with entries already in the store.
 func stateID(bucket, key, etag string, lastModified time.Time) string {
 	return bucket + key + etag + lastModified.String()
 }
 
 // newState creates a new s3 object state
-func newState(bucket, key, etag, listPrefix string, lastModified time.Time) state {
-	s := state{
+func newState(bucket, key, etag string, lastModified time.Time) state {
+	return state{
 		Bucket:       bucket,
 		Key:          key,
 		LastModified: lastModified,
 		Etag:         etag,
-		ListPrefix:   listPrefix,
-		Stored:       false,
-		Error:        false,
 	}
-
-	s.ID = stateID(s.Bucket, s.Key, s.Etag, s.LastModified)
-
-	return s
 }
 
-// MarkAsStored set the stored flag to true
-func (s *state) MarkAsStored() {
-	s.Stored = true
-}
-
-// MarkAsError set the error flag to true
-func (s *state) MarkAsError() {
-	s.Error = true
-}
-
-// IsProcessed checks if the state is either Stored or Error
-func (s *state) IsProcessed() bool {
-	return s.Stored || s.Error
+func (s *state) ID() string {
+	return stateID(s.Bucket, s.Key, s.Etag, s.LastModified)
 }
 
 // IsEqual checks if the two states point to the same s3 object.
 func (s *state) IsEqual(c *state) bool {
 	return s.Bucket == c.Bucket && s.Key == c.Key && s.Etag == c.Etag && s.LastModified.Equal(c.LastModified)
 }
-
-// IsEmpty checks if the state is empty
-func (s *state) IsEmpty() bool {
-	c := state{}
-	return s.Bucket == c.Bucket && s.Key == c.Key && s.Etag == c.Etag && s.LastModified.Equal(c.LastModified)
-}
-
-// String returns string representation of the struct
-func (s *state) String() string {
-	return fmt.Sprintf(
-		"{ID: %v, Bucket: %v, Key: %v, Etag: %v, LastModified: %v}",
-		s.ID,
-		s.Bucket,
-		s.Key,
-		s.Etag,
-		s.LastModified)
-}
diff --git a/x-pack/filebeat/input/awss3/state_test.go b/x-pack/filebeat/input/awss3/state_test.go
index 24a5e9d81b4e..375a44ce79e2 100644
--- a/x-pack/filebeat/input/awss3/state_test.go
+++ b/x-pack/filebeat/input/awss3/state_test.go
@@ -61,7 +61,7 @@ func TestStateIsEqual(t *testing.T) {
 					Key:          "/key/to/this/file/1",
 					Etag:         "etag",
 					LastModified: lastModifed,
-					Error:        true,
+					Failed:       true,
 				},
 				{
 					Bucket:       "bucket a",
diff --git a/x-pack/filebeat/input/awss3/states.go b/x-pack/filebeat/input/awss3/states.go
index 449219a867f5..edbbcc73793e 100644
--- a/x-pack/filebeat/input/awss3/states.go
+++ b/x-pack/filebeat/input/awss3/states.go
@@ -15,278 +15,64 @@ import (
 	"github.com/elastic/beats/v7/libbeat/statestore"
 )
 
-const (
-	awsS3ObjectStatePrefix = "filebeat::aws-s3::state::"
-	awsS3WriteCommitPrefix = "filebeat::aws-s3::writeCommit::"
-)
-
-type listingInfo struct {
-	totObjects int
-
-	mu            sync.Mutex
-	storedObjects int
-	errorObjects  int
-	finalCheck    bool
-}
+const awsS3ObjectStatePrefix = "filebeat::aws-s3::state::"
 
 // states handles list of s3 object state. One must use newStates to instantiate a
 // file states registry. Using the zero-value is not safe.
 type states struct {
-	sync.RWMutex
-
 	log *logp.Logger
 
-	// states store
-	states []state
-
-	// idx maps state IDs to state indexes for fast lookup and modifications.
-	idx map[string]int
+	// Completed S3 object states, indexed by state ID.
+	// statesLock must be held to access states.
+	states     map[string]state
+	statesLock sync.Mutex
 
-	listingIDs        map[string]struct{}
-	listingInfo       *sync.Map
-	statesByListingID map[string][]state
+	// The store used to persist state changes to the registry.
+	// storeLock must be held to access store.
+	store     *statestore.Store
+	storeLock sync.Mutex
 }
 
 // newStates generates a new states registry.
-func newStates(ctx v2.Context) *states {
-	return &states{
-		log:               ctx.Logger.Named("states"),
-		states:            nil,
-		idx:               map[string]int{},
-		listingInfo:       new(sync.Map),
-		listingIDs:        map[string]struct{}{},
-		statesByListingID: map[string][]state{},
-	}
-}
-
-func (s *states) MustSkip(state state, store *statestore.Store) bool {
-	if !s.IsNew(state) {
-		s.log.Debugw("not new state in must skip", "state", state)
-		return true
-	}
-
-	previousState := s.FindPrevious(state)
-
-	// status is forgotten. if there is no previous state and
-	// the state.LastModified is before the last cleanStore
-	// write commit we can remove
-	var commitWriteState commitWriteState
-	err := store.Get(awsS3WriteCommitPrefix+state.Bucket+state.ListPrefix, &commitWriteState)
-	if err == nil && previousState.IsEmpty() &&
-		(state.LastModified.Before(commitWriteState.Time) || state.LastModified.Equal(commitWriteState.Time)) {
-		s.log.Debugw("state.LastModified older than writeCommitState in must skip", "state", state, "commitWriteState", commitWriteState)
-		return true
-	}
-
-	// the previous state is stored or has error: let's skip
-	if !previousState.IsEmpty() && previousState.IsProcessed() {
-		s.log.Debugw("previous state is stored or has error", "state", state)
-		return true
-	}
-
-	return false
-}
-
-func (s *states) Delete(id string) {
-	s.Lock()
-	defer s.Unlock()
-
-	index := s.findPrevious(id)
-	if index >= 0 {
-		last := len(s.states) - 1
-		s.states[last], s.states[index] = s.states[index], s.states[last]
-		s.states = s.states[:last]
-
-		s.idx = map[string]int{}
-		for i, state := range s.states {
-			s.idx[state.ID] = i
-		}
-	}
-}
-
-// IsListingFullyStored check if listing if fully stored
-// After first time the condition is met it will always return false
-func (s *states) IsListingFullyStored(listingID string) bool {
-	info, ok := s.listingInfo.Load(listingID)
-	if !ok {
-		return false
-	}
-	listingInfo, ok := info.(*listingInfo)
-	if !ok {
-		return false
-	}
-
-	listingInfo.mu.Lock()
-	defer listingInfo.mu.Unlock()
-	if listingInfo.finalCheck {
-		return false
-	}
-
-	listingInfo.finalCheck = (listingInfo.storedObjects + listingInfo.errorObjects) == listingInfo.totObjects
-
-	if (listingInfo.storedObjects + listingInfo.errorObjects) > listingInfo.totObjects {
-		s.log.Warnf("unexepected mixmatch between storedObjects (%d), errorObjects (%d) and totObjects (%d)",
-			listingInfo.storedObjects, listingInfo.errorObjects, listingInfo.totObjects)
-	}
-
-	return listingInfo.finalCheck
-}
-
-// AddListing add listing info
-func (s *states) AddListing(listingID string, listingInfo *listingInfo) {
-	s.Lock()
-	defer s.Unlock()
-	s.listingIDs[listingID] = struct{}{}
-	s.listingInfo.Store(listingID, listingInfo)
-}
-
-// DeleteListing delete listing info
-func (s *states) DeleteListing(listingID string) {
-	s.Lock()
-	defer s.Unlock()
-	delete(s.listingIDs, listingID)
-	delete(s.statesByListingID, listingID)
-	s.listingInfo.Delete(listingID)
-}
-
-// Update updates a state. If previous state didn't exist, new one is created
-func (s *states) Update(newState state, listingID string) {
-	s.Lock()
-	defer s.Unlock()
-
-	id := newState.ID
-	index := s.findPrevious(id)
-
-	if index >= 0 {
-		s.states[index] = newState
-	} else {
-		// No existing state found, add new one
-		s.idx[id] = len(s.states)
-		s.states = append(s.states, newState)
-		s.log.Debug("New state added for ", newState.ID)
-	}
-
-	if listingID == "" || !newState.IsProcessed() {
-		return
-	}
-
-	// here we increase the number of stored object
-	info, ok := s.listingInfo.Load(listingID)
-	if !ok {
-		return
-	}
-	listingInfo, ok := info.(*listingInfo)
-	if !ok {
-		return
-	}
-
-	listingInfo.mu.Lock()
-
-	if newState.Stored {
-		listingInfo.storedObjects++
-	}
-
-	if newState.Error {
-		listingInfo.errorObjects++
-	}
-
-	listingInfo.mu.Unlock()
-
-	if _, ok := s.statesByListingID[listingID]; !ok {
-		s.statesByListingID[listingID] = make([]state, 0)
+func newStates(ctx v2.Context, store *statestore.Store) (*states, error) {
+	states := &states{
+		log:    ctx.Logger.Named("states"),
+		states: map[string]state{},
+		store:  store,
 	}
-
-	s.statesByListingID[listingID] = append(s.statesByListingID[listingID], newState)
+	return states, states.loadFromRegistry()
 }
 
-// FindPrevious lookups a registered state, that matching the new state.
-// Returns a zero-state if no match is found.
-func (s *states) FindPrevious(newState state) state {
-	s.RLock()
-	defer s.RUnlock()
-	id := newState.ID
-	i := s.findPrevious(id)
-	if i < 0 {
-		return state{}
-	}
-	return s.states[i]
+func (s *states) IsProcessed(state state) bool {
+	s.statesLock.Lock()
+	defer s.statesLock.Unlock()
+	// Our in-memory table only stores completed objects
+	_, ok := s.states[state.ID()]
+	return ok
 }
 
-// FindPreviousByID lookups a registered state, that matching the id.
-// Returns a zero-state if no match is found.
-func (s *states) FindPreviousByID(id string) state {
-	s.RLock()
-	defer s.RUnlock()
-	i := s.findPrevious(id)
-	if i < 0 {
-		return state{}
-	}
-	return s.states[i]
-}
-
-func (s *states) IsNew(state state) bool {
-	s.RLock()
-	defer s.RUnlock()
-	id := state.ID
-	i := s.findPrevious(id)
-
-	if i < 0 {
-		return true
-	}
+func (s *states) AddState(state state) {
 
-	return !s.states[i].IsEqual(&state)
-}
+	id := state.ID()
+	// Update in-memory copy
+	s.statesLock.Lock()
+	s.states[id] = state
+	s.statesLock.Unlock()
 
-// findPrevious returns the previous state for the file.
-// In case no previous state exists, index -1 is returned
-func (s *states) findPrevious(id string) int {
-	if i, exists := s.idx[id]; exists {
-		return i
+	// Persist to the registry
+	s.storeLock.Lock()
+	key := awsS3ObjectStatePrefix + id
+	if err := s.store.Set(key, state); err != nil {
+		s.log.Errorw("Failed to write states to the registry", "error", err)
 	}
-	return -1
-}
-
-// GetStates creates copy of the file states.
-func (s *states) GetStates() []state {
-	s.RLock()
-	defer s.RUnlock()
-
-	newStates := make([]state, len(s.states))
-	copy(newStates, s.states)
-
-	return newStates
-}
-
-// GetListingIDs return a of the listing IDs
-func (s *states) GetListingIDs() []string {
-	s.RLock()
-	defer s.RUnlock()
-	listingIDs := make([]string, 0, len(s.listingIDs))
-	for listingID := range s.listingIDs {
-		listingIDs = append(listingIDs, listingID)
-	}
-
-	return listingIDs
-}
-
-// GetStatesByListingID return a copy of the states by listing ID
-func (s *states) GetStatesByListingID(listingID string) []state {
-	s.RLock()
-	defer s.RUnlock()
-
-	if _, ok := s.statesByListingID[listingID]; !ok {
-		return nil
-	}
-
-	newStates := make([]state, len(s.statesByListingID[listingID]))
-	copy(newStates, s.statesByListingID[listingID])
-	return newStates
+	s.storeLock.Unlock()
 }
 
-func (s *states) readStatesFrom(store *statestore.Store) error {
-	var states []state
+func (s *states) loadFromRegistry() error {
+	states := map[string]state{}
 
-	err := store.Each(func(key string, dec statestore.ValueDecoder) (bool, error) {
+	s.storeLock.Lock()
+	err := s.store.Each(func(key string, dec statestore.ValueDecoder) (bool, error) {
 		if !strings.HasPrefix(key, awsS3ObjectStatePrefix) {
 			return true, nil
 		}
@@ -294,78 +80,30 @@ func (s *states) readStatesFrom(store *statestore.Store) error {
 		// try to decode. Ignore faulty/incompatible values.
 		var st state
 		if err := dec.Decode(&st); err != nil {
-			// XXX: Do we want to log here? In case we start to store other
-			// state types in the registry, then this operation will likely fail
-			// quite often, producing some false-positives in the logs...
-			return false, err
+			// Skip this key but continue iteration
+			s.log.Warnf("invalid S3 state loading object key %v", key)
+			//nolint:nilerr // One bad object shouldn't stop iteration
+			return true, nil
+		}
+		if !st.Stored && !st.Failed {
+			// This is from an older version where state could be stored in the
+			// registry even if the object wasn't processed, or if it encountered
+			// ephemeral download errors. We don't add these to the in-memory cache,
+			// so if we see them during a bucket scan we will still retry them.
+			return true, nil
 		}
 
-		st.ID = key[len(awsS3ObjectStatePrefix):]
-		states = append(states, st)
+		states[st.ID()] = st
 		return true, nil
 	})
+	s.storeLock.Unlock()
 	if err != nil {
 		return err
 	}
 
-	states = fixStates(states)
-
-	for _, state := range states {
-		s.Update(state, "")
-	}
-
-	return nil
-}
-
-// fixStates cleans up the registry states when updating from an older version
-// of filebeat potentially writing invalid entries.
-func fixStates(states []state) []state {
-	if len(states) == 0 {
-		return states
-	}
-
-	// we use a map of states here, so to identify and merge duplicate entries.
-	idx := map[string]*state{}
-	for i := range states {
-		state := &states[i]
-
-		old, exists := idx[state.ID]
-		if !exists {
-			idx[state.ID] = state
-		} else {
-			mergeStates(old, state) // overwrite the entry in 'old'
-		}
-	}
-
-	if len(idx) == len(states) {
-		return states
-	}
-
-	i := 0
-	newStates := make([]state, len(idx))
-	for _, state := range idx {
-		newStates[i] = *state
-		i++
-	}
-	return newStates
-}
-
-// mergeStates merges 2 states by trying to determine the 'newer' state.
-// The st state is overwritten with the updated fields.
-func mergeStates(st, other *state) {
-	// update file meta-data. As these are updated concurrently by the
-	// inputs, select the newer state based on the update timestamp.
-	if st.LastModified.Before(other.LastModified) {
-		st.LastModified = other.LastModified
-	}
-}
+	s.statesLock.Lock()
+	s.states = states
+	s.statesLock.Unlock()
 
-func (s *states) writeStates(store *statestore.Store) error {
-	for _, state := range s.GetStates() {
-		key := awsS3ObjectStatePrefix + state.ID
-		if err := store.Set(key, state); err != nil {
-			return err
-		}
-	}
 	return nil
 }
diff --git a/x-pack/filebeat/input/awss3/states_test.go b/x-pack/filebeat/input/awss3/states_test.go
index 39dc4cf82e63..2f8bbf58fdfb 100644
--- a/x-pack/filebeat/input/awss3/states_test.go
+++ b/x-pack/filebeat/input/awss3/states_test.go
@@ -14,6 +14,7 @@ import (
 	"github.com/elastic/beats/v7/libbeat/statestore/storetest"
 
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 
 	v2 "github.com/elastic/beats/v7/filebeat/input/v2"
 	"github.com/elastic/elastic-agent-libs/logp"
@@ -46,287 +47,92 @@ var inputCtx = v2.Context{
 	Cancelation: context.Background(),
 }
 
-func TestStatesIsNewAndMustSkip(t *testing.T) {
+func TestStatesAddStateAndIsProcessed(t *testing.T) {
 	type stateTestCase struct {
-		states            func() *states
-		state             state
-		mustBeNew         bool
-		persistentStoreKV map[string]interface{}
-		expectedMustSkip  bool
-		expectedIsNew     bool
+		// An initialization callback to invoke on the (initially empty) states.
+		statesEdit func(states *states)
+
+		// The state to call IsProcessed on and the expected result
+		state               state
+		expectedIsProcessed bool
+
+		// If true, the test will run statesEdit, then create a new states
+		// object from the same persistent store before calling IsProcessed
+		// (to test persistence between restarts).
+		shouldReload bool
 	}
 	lastModified := time.Date(2022, time.June, 30, 14, 13, 00, 0, time.UTC)
+	testState1 := newState("bucket", "key", "etag", lastModified)
+	testState2 := newState("bucket1", "key1", "etag1", lastModified)
 	tests := map[string]stateTestCase{
 		"with empty states": {
-			states: func() *states {
-				return newStates(inputCtx)
-			},
-			state:            newState("bucket", "key", "etag", "listPrefix", lastModified),
-			expectedMustSkip: false,
-			expectedIsNew:    true,
+			state:               testState1,
+			expectedIsProcessed: false,
 		},
 		"not existing state": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				states.Update(newState("bucket", "key", "etag", "listPrefix", lastModified), "")
-				return states
+			statesEdit: func(states *states) {
+				states.AddState(testState2)
 			},
-			state:            newState("bucket1", "key1", "etag1", "listPrefix1", lastModified),
-			expectedMustSkip: false,
-			expectedIsNew:    true,
+			state:               testState1,
+			expectedIsProcessed: false,
 		},
 		"existing state": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				states.Update(newState("bucket", "key", "etag", "listPrefix", lastModified), "")
-				return states
-			},
-			state:            newState("bucket", "key", "etag", "listPrefix", lastModified),
-			expectedMustSkip: true,
-			expectedIsNew:    false,
-		},
-		"with different etag": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				states.Update(newState("bucket", "key", "etag1", "listPrefix", lastModified), "")
-				return states
-			},
-			state:            newState("bucket", "key", "etag2", "listPrefix", lastModified),
-			expectedMustSkip: false,
-			expectedIsNew:    true,
-		},
-		"with different lastmodified": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				states.Update(newState("bucket", "key", "etag", "listPrefix", lastModified), "")
-				return states
-			},
-			state:            newState("bucket", "key", "etag", "listPrefix", lastModified.Add(1*time.Second)),
-			expectedMustSkip: false,
-			expectedIsNew:    true,
-		},
-		"with stored state": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				aState := newState("bucket", "key", "etag", "listPrefix", lastModified)
-				aState.Stored = true
-				states.Update(aState, "")
-				return states
+			statesEdit: func(states *states) {
+				states.AddState(testState1)
 			},
-			state:            newState("bucket", "key", "etag", "listPrefix", lastModified),
-			mustBeNew:        true,
-			expectedMustSkip: true,
-			expectedIsNew:    true,
+			state:               testState1,
+			expectedIsProcessed: true,
 		},
-		"with error state": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				aState := newState("bucket", "key", "etag", "listPrefix", lastModified)
-				aState.Error = true
-				states.Update(aState, "")
-				return states
+		"existing stored state is persisted": {
+			statesEdit: func(states *states) {
+				state := testState1
+				state.Stored = true
+				states.AddState(state)
 			},
-			state:            newState("bucket", "key", "etag", "listPrefix", lastModified),
-			mustBeNew:        true,
-			expectedMustSkip: true,
-			expectedIsNew:    true,
+			state:               testState1,
+			shouldReload:        true,
+			expectedIsProcessed: true,
 		},
-		"before commit write": {
-			states: func() *states {
-				return newStates(inputCtx)
+		"existing failed state is persisted": {
+			statesEdit: func(states *states) {
+				state := testState1
+				state.Failed = true
+				states.AddState(state)
 			},
-			persistentStoreKV: map[string]interface{}{
-				awsS3WriteCommitPrefix + "bucket" + "listPrefix": &commitWriteState{lastModified},
-			},
-			state:            newState("bucket", "key", "etag", "listPrefix", lastModified.Add(-1*time.Second)),
-			expectedMustSkip: true,
-			expectedIsNew:    true,
+			state:               testState1,
+			shouldReload:        true,
+			expectedIsProcessed: true,
 		},
-		"same commit write": {
-			states: func() *states {
-				return newStates(inputCtx)
-			},
-			persistentStoreKV: map[string]interface{}{
-				awsS3WriteCommitPrefix + "bucket" + "listPrefix": &commitWriteState{lastModified},
+		"existing unprocessed state is not persisted": {
+			statesEdit: func(states *states) {
+				states.AddState(testState1)
 			},
-			state:            newState("bucket", "key", "etag", "listPrefix", lastModified),
-			expectedMustSkip: true,
-			expectedIsNew:    true,
-		},
-		"after commit write": {
-			states: func() *states {
-				return newStates(inputCtx)
-			},
-			persistentStoreKV: map[string]interface{}{
-				awsS3WriteCommitPrefix + "bucket" + "listPrefix": &commitWriteState{lastModified},
-			},
-			state:            newState("bucket", "key", "etag", "listPrefix", lastModified.Add(time.Second)),
-			expectedMustSkip: false,
-			expectedIsNew:    true,
+			state:               testState1,
+			shouldReload:        true,
+			expectedIsProcessed: false,
 		},
 	}
 
 	for name, test := range tests {
 		test := test
 		t.Run(name, func(t *testing.T) {
-			states := test.states()
 			store := openTestStatestore()
 			persistentStore, err := store.Access()
 			if err != nil {
 				t.Fatalf("unexpected err: %v", err)
 			}
-			for key, value := range test.persistentStoreKV {
-				_ = persistentStore.Set(key, value)
+			states, err := newStates(inputCtx, persistentStore)
+			require.NoError(t, err, "states creation must succeed")
+			if test.statesEdit != nil {
+				test.statesEdit(states)
 			}
-
-			if test.mustBeNew {
-				test.state.LastModified = test.state.LastModified.Add(1 * time.Second)
+			if test.shouldReload {
+				states, err = newStates(inputCtx, persistentStore)
+				require.NoError(t, err, "states creation must succeed")
 			}
 
-			isNew := states.IsNew(test.state)
-			assert.Equal(t, test.expectedIsNew, isNew)
-
-			mustSkip := states.MustSkip(test.state, persistentStore)
-			assert.Equal(t, test.expectedMustSkip, mustSkip)
-		})
-	}
-}
-
-func TestStatesDelete(t *testing.T) {
-	type stateTestCase struct {
-		states   func() *states
-		deleteID string
-		expected []state
-	}
-
-	lastModified := time.Date(2021, time.July, 22, 18, 38, 00, 0, time.UTC)
-	tests := map[string]stateTestCase{
-		"delete empty states": {
-			states: func() *states {
-				return newStates(inputCtx)
-			},
-			deleteID: "an id",
-			expected: []state{},
-		},
-		"delete not existing state": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				states.Update(newState("bucket", "key", "etag", "listPrefix", lastModified), "")
-				return states
-			},
-			deleteID: "an id",
-			expected: []state{
-				{
-					ID:           stateID("bucket", "key", "etag", lastModified),
-					Bucket:       "bucket",
-					Key:          "key",
-					Etag:         "etag",
-					ListPrefix:   "listPrefix",
-					LastModified: lastModified,
-				},
-			},
-		},
-		"delete only one existing": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				states.Update(newState("bucket", "key", "etag", "listPrefix", lastModified), "")
-				return states
-			},
-			deleteID: stateID("bucket", "key", "etag", lastModified),
-			expected: []state{},
-		},
-		"delete first": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				states.Update(newState("bucket", "key1", "etag1", "listPrefix", lastModified), "")
-				states.Update(newState("bucket", "key2", "etag2", "listPrefix", lastModified), "")
-				states.Update(newState("bucket", "key3", "etag3", "listPrefix", lastModified), "")
-				return states
-			},
-			deleteID: "bucketkey1etag1" + lastModified.String(),
-			expected: []state{
-				{
-					ID:           stateID("bucket", "key3", "etag3", lastModified),
-					Bucket:       "bucket",
-					Key:          "key3",
-					Etag:         "etag3",
-					ListPrefix:   "listPrefix",
-					LastModified: lastModified,
-				},
-				{
-					ID:           stateID("bucket", "key2", "etag2", lastModified),
-					Bucket:       "bucket",
-					Key:          "key2",
-					Etag:         "etag2",
-					ListPrefix:   "listPrefix",
-					LastModified: lastModified,
-				},
-			},
-		},
-		"delete last": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				states.Update(newState("bucket", "key1", "etag1", "listPrefix", lastModified), "")
-				states.Update(newState("bucket", "key2", "etag2", "listPrefix", lastModified), "")
-				states.Update(newState("bucket", "key3", "etag3", "listPrefix", lastModified), "")
-				return states
-			},
-			deleteID: "bucketkey3etag3" + lastModified.String(),
-			expected: []state{
-				{
-					ID:           stateID("bucket", "key1", "etag1", lastModified),
-					Bucket:       "bucket",
-					Key:          "key1",
-					Etag:         "etag1",
-					ListPrefix:   "listPrefix",
-					LastModified: lastModified,
-				},
-				{
-					ID:           stateID("bucket", "key2", "etag2", lastModified),
-					Bucket:       "bucket",
-					Key:          "key2",
-					Etag:         "etag2",
-					ListPrefix:   "listPrefix",
-					LastModified: lastModified,
-				},
-			},
-		},
-		"delete any": {
-			states: func() *states {
-				states := newStates(inputCtx)
-				states.Update(newState("bucket", "key1", "etag1", "listPrefix", lastModified), "")
-				states.Update(newState("bucket", "key2", "etag2", "listPrefix", lastModified), "")
-				states.Update(newState("bucket", "key3", "etag3", "listPrefix", lastModified), "")
-				return states
-			},
-			deleteID: "bucketkey2etag2" + lastModified.String(),
-			expected: []state{
-				{
-					ID:           stateID("bucket", "key1", "etag1", lastModified),
-					Bucket:       "bucket",
-					Key:          "key1",
-					Etag:         "etag1",
-					ListPrefix:   "listPrefix",
-					LastModified: lastModified,
-				},
-				{
-					ID:           stateID("bucket", "key3", "etag3", lastModified),
-					Bucket:       "bucket",
-					Key:          "key3",
-					Etag:         "etag3",
-					ListPrefix:   "listPrefix",
-					LastModified: lastModified,
-				},
-			},
-		},
-	}
-
-	for name, test := range tests {
-		test := test
-		t.Run(name, func(t *testing.T) {
-			states := test.states()
-			states.Delete(test.deleteID)
-			assert.Equal(t, test.expected, states.GetStates())
+			isProcessed := states.IsProcessed(test.state)
+			assert.Equal(t, test.expectedIsProcessed, isProcessed)
 		})
 	}
 }
diff --git a/x-pack/metricbeat/module/azure/azure_test.go b/x-pack/metricbeat/module/azure/azure_test.go
new file mode 100644
index 000000000000..c3d67525ddb9
--- /dev/null
+++ b/x-pack/metricbeat/module/azure/azure_test.go
@@ -0,0 +1,39 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package azure
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestGroupMetricsDefinitionsByResourceId(t *testing.T) {
+
+	t.Run("Group metrics definitions by resource ID", func(t *testing.T) {
+		metrics := []Metric{
+			{
+				ResourceId: "resource-1",
+				Namespace:  "namespace-1",
+				Names:      []string{"metric-1"},
+			},
+			{
+				ResourceId: "resource-1",
+				Namespace:  "namespace-1",
+				Names:      []string{"metric-2"},
+			},
+			{
+				ResourceId: "resource-1",
+				Namespace:  "namespace-1",
+				Names:      []string{"metric-3"},
+			},
+		}
+
+		metricsByResourceId := groupMetricsDefinitionsByResourceId(metrics)
+
+		assert.Equal(t, 1, len(metricsByResourceId))
+		assert.Equal(t, 3, len(metricsByResourceId["resource-1"]))
+	})
+}
diff --git a/x-pack/metricbeat/module/azure/client_test.go b/x-pack/metricbeat/module/azure/client_test.go
index 79b1742ded0f..c23326ac82b7 100644
--- a/x-pack/metricbeat/module/azure/client_test.go
+++ b/x-pack/metricbeat/module/azure/client_test.go
@@ -9,10 +9,12 @@ import (
 	"testing"
 	"time"
 
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
 	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/monitor/armmonitor"
 	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
+	"github.com/stretchr/testify/require"
 )
 
 var (
@@ -35,6 +37,7 @@ var (
 					},
 				}}},
 	}
+	countUnit = armmonitor.MetricUnit("Count")
 )
 
 func mockMapResourceMetrics(client *Client, resources []*armresources.GenericResourceExpanded, resourceConfig ResourceConfig) ([]Metric, error) {
@@ -112,4 +115,157 @@ func TestGetMetricValues(t *testing.T) {
 		assert.Equal(t, len(client.ResourceConfigurations.Metrics[0].Values), 0)
 		m.AssertExpectations(t)
 	})
+
+	t.Run("multiple aggregation types", func(t *testing.T) {
+		client := NewMockClient()
+		referenceTime := time.Now().UTC()
+		client.ResourceConfigurations = ResourceConfiguration{
+			Metrics: []Metric{
+				{
+					Namespace:    "Microsoft.EventHub/Namespaces",
+					Names:        []string{"ActiveConnections"},
+					Aggregations: "Maximum,Minimum,Average",
+					TimeGrain:    "PT1M",
+				},
+			},
+		}
+
+		m := &MockService{}
+		m.On(
+			"GetMetricValues",
+			mock.Anything,
+			mock.Anything,
+			mock.Anything,
+			mock.Anything,
+			mock.Anything,
+			mock.Anything,
+			mock.Anything,
+		).Return(
+			[]armmonitor.Metric{{
+				ID: to.Ptr("test"),
+				Name: &armmonitor.LocalizableString{
+					Value:          to.Ptr("ActiveConnections"),
+					LocalizedValue: to.Ptr("ActiveConnections"),
+				},
+				Timeseries: []*armmonitor.TimeSeriesElement{{
+					Data: []*armmonitor.MetricValue{{
+						Average:   to.Ptr(1.0),
+						Maximum:   to.Ptr(2.0),
+						Minimum:   to.Ptr(3.0),
+						TimeStamp: to.Ptr(time.Now()),
+					}},
+				}},
+				Type:               to.Ptr("Microsoft.Insights/metrics"),
+				Unit:               &countUnit,
+				DisplayDescription: to.Ptr("Total Active Connections for Microsoft.EventHub."),
+				ErrorCode:          to.Ptr("Success"),
+			}},
+			"PT1M",
+			nil,
+		)
+
+		client.AzureMonitorService = m
+		mr := MockReporterV2{}
+
+		metricValues := client.GetMetricValues(referenceTime, client.ResourceConfigurations.Metrics, &mr)
+
+		require.Equal(t, len(metricValues), 1)
+		require.Equal(t, len(metricValues[0].Values), 1)
+
+		assert.Equal(t, *metricValues[0].Values[0].avg, 1.0)
+		assert.Equal(t, *metricValues[0].Values[0].max, 2.0)
+		assert.Equal(t, *metricValues[0].Values[0].min, 3.0)
+
+		require.Equal(t, len(client.ResourceConfigurations.Metrics[0].Values), 1)
+
+		m.AssertExpectations(t)
+	})
+
+	t.Run("single aggregation types", func(t *testing.T) {
+		client := NewMockClient()
+		referenceTime := time.Now().UTC()
+		timestamp := time.Now().UTC()
+		client.ResourceConfigurations = ResourceConfiguration{
+			Metrics: []Metric{
+				{
+					Namespace:    "Microsoft.EventHub/Namespaces",
+					Names:        []string{"ActiveConnections"},
+					Aggregations: "Maximum",
+					TimeGrain:    "PT1M",
+				}, {
+					Namespace:    "Microsoft.EventHub/Namespaces",
+					Names:        []string{"ActiveConnections"},
+					Aggregations: "Minimum",
+					TimeGrain:    "PT1M",
+				}, {
+					Namespace:    "Microsoft.EventHub/Namespaces",
+					Names:        []string{"ActiveConnections"},
+					Aggregations: "Average",
+					TimeGrain:    "PT1M",
+				},
+			},
+		}
+
+		m := &MockService{}
+
+		x := []struct {
+			aggregation string
+			data        []*armmonitor.MetricValue
+		}{
+			{aggregation: "Maximum", data: []*armmonitor.MetricValue{{Maximum: to.Ptr(3.0), TimeStamp: to.Ptr(timestamp)}}},
+			{aggregation: "Minimum", data: []*armmonitor.MetricValue{{Minimum: to.Ptr(1.0), TimeStamp: to.Ptr(timestamp)}}},
+			{aggregation: "Average", data: []*armmonitor.MetricValue{{Average: to.Ptr(2.0), TimeStamp: to.Ptr(timestamp)}}},
+		}
+
+		for _, v := range x {
+			m.On(
+				"GetMetricValues",
+				mock.Anything,
+				mock.Anything,
+				mock.Anything,
+				mock.Anything,
+				mock.Anything,
+				v.aggregation,
+				mock.Anything,
+			).Return(
+				[]armmonitor.Metric{{
+					ID: to.Ptr("test"),
+					Name: &armmonitor.LocalizableString{
+						Value:          to.Ptr("ActiveConnections"),
+						LocalizedValue: to.Ptr("ActiveConnections"),
+					},
+					Timeseries: []*armmonitor.TimeSeriesElement{{
+						Data: v.data,
+					}},
+					Type:               to.Ptr("Microsoft.Insights/metrics"),
+					Unit:               &countUnit,
+					DisplayDescription: to.Ptr("Total Active Connections for Microsoft.EventHub."),
+					ErrorCode:          to.Ptr("Success"),
+				}},
+				"PT1M",
+				nil,
+			).Once()
+		}
+
+		client.AzureMonitorService = m
+		mr := MockReporterV2{}
+
+		metricValues := client.GetMetricValues(referenceTime, client.ResourceConfigurations.Metrics, &mr)
+
+		require.Equal(t, 3, len(metricValues))
+
+		require.Equal(t, 1, len(metricValues[0].Values))
+		require.Equal(t, 1, len(metricValues[1].Values))
+		require.Equal(t, 1, len(metricValues[2].Values))
+
+		require.NotNil(t, metricValues[0].Values[0].max, "max value is nil")
+		require.NotNil(t, metricValues[1].Values[0].min, "min value is nil")
+		require.NotNil(t, metricValues[2].Values[0].avg, "avg value is nil")
+
+		assert.Equal(t, *metricValues[0].Values[0].max, 3.0)
+		assert.Equal(t, *metricValues[1].Values[0].min, 1.0)
+		assert.Equal(t, *metricValues[2].Values[0].avg, 2.0)
+
+		m.AssertExpectations(t)
+	})
 }
diff --git a/x-pack/metricbeat/module/azure/data.go b/x-pack/metricbeat/module/azure/data.go
index c46aee9da246..b2fffb404262 100644
--- a/x-pack/metricbeat/module/azure/data.go
+++ b/x-pack/metricbeat/module/azure/data.go
@@ -133,41 +133,8 @@ func mapToKeyValuePoints(metrics []Metric) []KeyValuePoint {
 	var points []KeyValuePoint
 	for _, metric := range metrics {
 		for _, value := range metric.Values {
-			point := KeyValuePoint{
-				Timestamp:  value.timestamp,
-				Dimensions: mapstr.M{},
-			}
-
 			metricName := managePropertyName(value.name)
-			switch {
-			case value.min != nil:
-				point.Key = fmt.Sprintf("%s.%s", metricName, "min")
-				point.Value = value.min
-			case value.max != nil:
-				point.Key = fmt.Sprintf("%s.%s", metricName, "max")
-				point.Value = value.max
-			case value.avg != nil:
-				point.Key = fmt.Sprintf("%s.%s", metricName, "avg")
-				point.Value = value.avg
-			case value.total != nil:
-				point.Key = fmt.Sprintf("%s.%s", metricName, "total")
-				point.Value = value.total
-			case value.count != nil:
-				point.Key = fmt.Sprintf("%s.%s", metricName, "count")
-				point.Value = value.count
-			}
-
-			point.Namespace = metric.Namespace
-			point.ResourceId = metric.ResourceId
-			point.ResourceSubId = metric.ResourceSubId
-			point.TimeGrain = metric.TimeGrain
-
-			// The number of dimensions in the metric definition and the
-			// number of dimensions in the metric values should be the same.
-			//
-			// But, since definitions and values are retrieved from different
-			// API endpoints, we need to make sure that we don't panic if the
-			// number of dimensions is different.
+			dimensions := mapstr.M{}
 			if len(metric.Dimensions) == len(value.dimensions) {
 				// Take the dimension name from the metric definition and the
 				// dimension value from the metric value.
@@ -180,11 +147,75 @@ func mapToKeyValuePoints(metrics []Metric) []KeyValuePoint {
 					// Dimensions from metric definition and metric value are
 					// not guaranteed to be in the same order, so we need to
 					// find by name the right value for each dimension.
-					_, _ = point.Dimensions.Put(dim.Name, getDimensionValue(dim.Name, value.dimensions))
+					// _, _ = point.Dimensions.Put(dim.Name, getDimensionValue(dim.Name, value.dimensions))
+					_, _ = dimensions.Put(dim.Name, getDimensionValue(dim.Name, value.dimensions))
 				}
 			}
 
-			points = append(points, point)
+			if value.min != nil {
+				points = append(points, KeyValuePoint{
+					Key:           fmt.Sprintf("%s.%s", metricName, "min"),
+					Value:         value.min,
+					Namespace:     metric.Namespace,
+					ResourceId:    metric.ResourceId,
+					ResourceSubId: metric.ResourceSubId,
+					TimeGrain:     metric.TimeGrain,
+					Dimensions:    dimensions,
+					Timestamp:     value.timestamp,
+				})
+			}
+
+			if value.max != nil {
+				points = append(points, KeyValuePoint{
+					Key:           fmt.Sprintf("%s.%s", metricName, "max"),
+					Value:         value.max,
+					Namespace:     metric.Namespace,
+					ResourceId:    metric.ResourceId,
+					ResourceSubId: metric.ResourceSubId,
+					TimeGrain:     metric.TimeGrain,
+					Dimensions:    dimensions,
+					Timestamp:     value.timestamp,
+				})
+			}
+
+			if value.avg != nil {
+				points = append(points, KeyValuePoint{
+					Key:           fmt.Sprintf("%s.%s", metricName, "avg"),
+					Value:         value.avg,
+					Namespace:     metric.Namespace,
+					ResourceId:    metric.ResourceId,
+					ResourceSubId: metric.ResourceSubId,
+					TimeGrain:     metric.TimeGrain,
+					Dimensions:    dimensions,
+					Timestamp:     value.timestamp,
+				})
+			}
+
+			if value.total != nil {
+				points = append(points, KeyValuePoint{
+					Key:           fmt.Sprintf("%s.%s", metricName, "total"),
+					Value:         value.total,
+					Namespace:     metric.Namespace,
+					ResourceId:    metric.ResourceId,
+					ResourceSubId: metric.ResourceSubId,
+					TimeGrain:     metric.TimeGrain,
+					Dimensions:    dimensions,
+					Timestamp:     value.timestamp,
+				})
+			}
+
+			if value.count != nil {
+				points = append(points, KeyValuePoint{
+					Key:           fmt.Sprintf("%s.%s", metricName, "count"),
+					Value:         value.count,
+					Namespace:     metric.Namespace,
+					ResourceId:    metric.ResourceId,
+					ResourceSubId: metric.ResourceSubId,
+					TimeGrain:     metric.TimeGrain,
+					Dimensions:    dimensions,
+					Timestamp:     value.timestamp,
+				})
+			}
 		}
 	}
 
diff --git a/x-pack/metricbeat/module/azure/data_test.go b/x-pack/metricbeat/module/azure/data_test.go
index 85b781ed64ec..1519f78982d2 100644
--- a/x-pack/metricbeat/module/azure/data_test.go
+++ b/x-pack/metricbeat/module/azure/data_test.go
@@ -62,7 +62,37 @@ func TestMapToKeyValuePoints(t *testing.T) {
 	resourceSubId := "test"
 	timeGrain := "PT1M"
 
-	t.Run("test aggregation types", func(t *testing.T) {
+	t.Run("test single aggregation type (single config)", func(t *testing.T) {
+
+		metrics := []Metric{{
+			Namespace:     namespace,
+			Names:         []string{"test"},
+			Aggregations:  "min",
+			Values:        []MetricValue{{name: metricName, min: &minValue, timestamp: timestamp}},
+			TimeGrain:     timeGrain,
+			ResourceId:    resourceId,
+			ResourceSubId: resourceSubId,
+		}}
+
+		actual := mapToKeyValuePoints(metrics)
+
+		expected := []KeyValuePoint{
+			{
+				Key:           fmt.Sprintf("%s.%s", metricName, "min"),
+				Value:         &minValue,
+				Namespace:     namespace,
+				TimeGrain:     timeGrain,
+				Timestamp:     timestamp,
+				ResourceId:    resourceId,
+				ResourceSubId: resourceSubId,
+				Dimensions:    map[string]interface{}{},
+			},
+		}
+
+		assert.Equal(t, expected, actual)
+	})
+
+	t.Run("test single aggregation types (multiple configs)", func(t *testing.T) {
 
 		metrics := []Metric{{
 			Namespace:     namespace,
@@ -161,4 +191,79 @@ func TestMapToKeyValuePoints(t *testing.T) {
 
 		assert.Equal(t, expected, actual)
 	})
+
+	t.Run("test multiple aggregation types (multiple configs)", func(t *testing.T) {
+		metrics := []Metric{{
+			Namespace:    namespace,
+			Names:        []string{"test"},
+			Aggregations: "Minimum,Maximum,Average,Total,Count",
+			Values: []MetricValue{
+				{name: metricName, min: &minValue, timestamp: timestamp},
+				{name: metricName, max: &maxValue, timestamp: timestamp},
+				{name: metricName, avg: &avgValue, timestamp: timestamp},
+				{name: metricName, total: &totalValue, timestamp: timestamp},
+				{name: metricName, count: &countValue, timestamp: timestamp},
+			},
+			TimeGrain:     timeGrain,
+			ResourceId:    resourceId,
+			ResourceSubId: resourceSubId,
+		}}
+
+		actual := mapToKeyValuePoints(metrics)
+
+		expected := []KeyValuePoint{
+			{
+				Key:           fmt.Sprintf("%s.%s", metricName, "min"),
+				Value:         &minValue,
+				Namespace:     namespace,
+				TimeGrain:     timeGrain,
+				Timestamp:     timestamp,
+				ResourceId:    resourceId,
+				ResourceSubId: resourceSubId,
+				Dimensions:    map[string]interface{}{},
+			},
+			{
+				Key:           fmt.Sprintf("%s.%s", metricName, "max"),
+				Value:         &maxValue,
+				Namespace:     namespace,
+				TimeGrain:     timeGrain,
+				Timestamp:     timestamp,
+				ResourceId:    resourceId,
+				ResourceSubId: resourceSubId,
+				Dimensions:    map[string]interface{}{},
+			},
+			{
+				Key:           fmt.Sprintf("%s.%s", metricName, "avg"),
+				Value:         &avgValue,
+				Namespace:     namespace,
+				TimeGrain:     timeGrain,
+				Timestamp:     timestamp,
+				ResourceId:    resourceId,
+				ResourceSubId: resourceSubId,
+				Dimensions:    map[string]interface{}{},
+			},
+			{
+				Key:           fmt.Sprintf("%s.%s", metricName, "total"),
+				Value:         &totalValue,
+				Namespace:     namespace,
+				TimeGrain:     timeGrain,
+				Timestamp:     timestamp,
+				ResourceId:    resourceId,
+				ResourceSubId: resourceSubId,
+				Dimensions:    map[string]interface{}{},
+			},
+			{
+				Key:           fmt.Sprintf("%s.%s", metricName, "count"),
+				Value:         &countValue,
+				Namespace:     namespace,
+				TimeGrain:     timeGrain,
+				Timestamp:     timestamp,
+				ResourceId:    resourceId,
+				ResourceSubId: resourceSubId,
+				Dimensions:    map[string]interface{}{},
+			},
+		}
+
+		assert.Equal(t, expected, actual)
+	})
 }
diff --git a/x-pack/metricbeat/module/azure/metric_registry.go b/x-pack/metricbeat/module/azure/metric_registry.go
index cdaa9496b5d6..c127701c996e 100644
--- a/x-pack/metricbeat/module/azure/metric_registry.go
+++ b/x-pack/metricbeat/module/azure/metric_registry.go
@@ -5,6 +5,7 @@
 package azure
 
 import (
+	"fmt"
 	"strings"
 	"time"
 
@@ -118,8 +119,14 @@ func (m *MetricRegistry) buildMetricKey(metric Metric) string {
 	keyComponents := []string{
 		metric.Namespace,
 		metric.ResourceId,
+		metric.Aggregations,
+		metric.TimeGrain,
+		strings.Join(metric.Names, ","),
+	}
+
+	for _, dim := range metric.Dimensions {
+		keyComponents = append(keyComponents, fmt.Sprintf("%s=%s", dim.Name, dim.Value))
 	}
-	keyComponents = append(keyComponents, metric.Names...)
 
 	return strings.Join(keyComponents, ",")
 }
diff --git a/x-pack/metricbeat/module/azure/metric_registry_test.go b/x-pack/metricbeat/module/azure/metric_registry_test.go
index a0ecdc84b85d..63984aa6b59e 100644
--- a/x-pack/metricbeat/module/azure/metric_registry_test.go
+++ b/x-pack/metricbeat/module/azure/metric_registry_test.go
@@ -13,7 +13,7 @@ import (
 	"github.com/elastic/elastic-agent-libs/logp"
 )
 
-func TestNewMetricRegistry(t *testing.T) {
+func TestMetricRegistry(t *testing.T) {
 	logger := logp.NewLogger("test azure monitor")
 
 	t.Run("Collect metrics with a regular 5 minutes period", func(t *testing.T) {
@@ -90,4 +90,140 @@ func TestNewMetricRegistry(t *testing.T) {
 
 		assert.True(t, needsUpdate, "metric should not need update")
 	})
+
+	t.Run("Metrics with different aggregation types", func(t *testing.T) {
+		metricRegistry := NewMetricRegistry(logger)
+
+		referenceTime := time.Now().UTC()
+		lastCollectionAt := referenceTime.Add(-time.Minute * 10)
+
+		metric1 := Metric{
+			ResourceId:   "test",
+			Namespace:    "test",
+			Aggregations: "Maximum",
+		}
+		metric2 := Metric{
+			ResourceId:   "test",
+			Namespace:    "test",
+			Aggregations: "Minimum",
+		}
+
+		metricCollectionInfo := MetricCollectionInfo{
+			timeGrain: "PT5M",
+			timestamp: lastCollectionAt,
+		}
+
+		// Update metrics collection info for previous collection
+		metricRegistry.Update(metric1, metricCollectionInfo)
+		metricRegistry.Update(metric2, metricCollectionInfo)
+
+		// Update metric info for metric1
+		metricRegistry.Update(metric1, MetricCollectionInfo{
+			timeGrain: "PT5M",
+			timestamp: referenceTime,
+		})
+
+		// Check if metrics need update
+		metric1NeedsUpdate := metricRegistry.NeedsUpdate(referenceTime, metric1)
+		metric2NeedsUpdate := metricRegistry.NeedsUpdate(referenceTime, metric2)
+
+		assert.False(t, metric1NeedsUpdate, "metric should not need update")
+		assert.True(t, metric2NeedsUpdate, "metric should need update")
+	})
+
+	t.Run("Metrics with different dimensions", func(t *testing.T) {
+		metricRegistry := NewMetricRegistry(logger)
+
+		referenceTime := time.Now().UTC()
+		lastCollectionAt := referenceTime.Add(-time.Minute * 10)
+
+		metric1 := Metric{
+			ResourceId: "resource-id-1",
+			Namespace:  "namespace-1",
+			Names:      []string{"metric-name-1"},
+			Dimensions: []Dimension{
+				{Name: "dimension-1", Value: "*"},
+			},
+			TimeGrain: "PT1M",
+		}
+		metric2 := Metric{
+			ResourceId: "resource-id-1",
+			Namespace:  "namespace-1",
+			Names:      []string{"metric-name-1"},
+			Dimensions: []Dimension{
+				{Name: "dimension-2", Value: "*"},
+			},
+			TimeGrain: "PT1M",
+		}
+
+		metricCollectionInfo := MetricCollectionInfo{
+			timeGrain: "PT1M",
+			timestamp: lastCollectionAt,
+		}
+
+		// Update metrics collection info for previous collection
+		metricRegistry.Update(metric1, metricCollectionInfo)
+		metricRegistry.Update(metric2, metricCollectionInfo)
+
+		// Update metric info for metric1
+		metricRegistry.Update(metric1, MetricCollectionInfo{
+			timeGrain: "PT1M",
+			timestamp: referenceTime,
+		})
+
+		// Check if metrics need update
+		metric1NeedsUpdate := metricRegistry.NeedsUpdate(referenceTime, metric1)
+		metric2NeedsUpdate := metricRegistry.NeedsUpdate(referenceTime, metric2)
+
+		assert.False(t, metric1NeedsUpdate, "metric should not need update")
+		assert.True(t, metric2NeedsUpdate, "metric should need update")
+	})
+
+	t.Run("Metrics with different timegrain", func(t *testing.T) {
+		metricRegistry := NewMetricRegistry(logger)
+
+		referenceTime := time.Now().UTC()
+		lastCollectionAt := referenceTime.Add(-time.Minute * 10)
+
+		metric1 := Metric{
+			ResourceId: "resource-id-1",
+			Namespace:  "namespace-1",
+			Names:      []string{"metric-name-1"},
+			Dimensions: []Dimension{
+				{Name: "dimension-1", Value: "*"},
+			},
+			TimeGrain: "PT1M",
+		}
+		metric2 := Metric{
+			ResourceId: "resource-id-1",
+			Namespace:  "namespace-1",
+			Names:      []string{"metric-name-1"},
+			Dimensions: []Dimension{
+				{Name: "dimension-1", Value: "*"},
+			},
+			TimeGrain: "PT5M",
+		}
+
+		metricCollectionInfo := MetricCollectionInfo{
+			timeGrain: "PT1M",
+			timestamp: lastCollectionAt,
+		}
+
+		// Update metrics collection info for previous collection
+		metricRegistry.Update(metric1, metricCollectionInfo)
+		metricRegistry.Update(metric2, metricCollectionInfo)
+
+		// Update metric info for metric1
+		metricRegistry.Update(metric1, MetricCollectionInfo{
+			timeGrain: "PT1M",
+			timestamp: referenceTime,
+		})
+
+		// Check if metrics need update
+		metric1NeedsUpdate := metricRegistry.NeedsUpdate(referenceTime, metric1)
+		metric2NeedsUpdate := metricRegistry.NeedsUpdate(referenceTime, metric2)
+
+		assert.False(t, metric1NeedsUpdate, "metric should not need update")
+		assert.True(t, metric2NeedsUpdate, "metric should need update")
+	})
 }
diff --git a/x-pack/metricbeat/module/azure/mock_service.go b/x-pack/metricbeat/module/azure/mock_service.go
index 9626952fa6d1..293adc7c9a78 100644
--- a/x-pack/metricbeat/module/azure/mock_service.go
+++ b/x-pack/metricbeat/module/azure/mock_service.go
@@ -43,7 +43,7 @@ func (client *MockService) GetMetricNamespaces(resourceId string) (armmonitor.Me
 
 // GetMetricValues is a mock function for the azure service
 func (client *MockService) GetMetricValues(resourceId string, namespace string, timegrain string, timespan string, metricNames []string, aggregations string, filter string) ([]armmonitor.Metric, string, error) {
-	args := client.Called(resourceId, namespace)
+	args := client.Called(resourceId, namespace, timegrain, timespan, metricNames, aggregations, filter)
 	return args.Get(0).([]armmonitor.Metric), args.String(1), args.Error(2)
 }
 
diff --git a/x-pack/metricbeat/module/azure/service_interface.go b/x-pack/metricbeat/module/azure/service_interface.go
index cb524c7f6ea5..75ae48d3d6e4 100644
--- a/x-pack/metricbeat/module/azure/service_interface.go
+++ b/x-pack/metricbeat/module/azure/service_interface.go
@@ -15,5 +15,16 @@ type Service interface {
 	GetResourceDefinitions(id []string, group []string, rType string, query string) ([]*armresources.GenericResourceExpanded, error)
 	GetMetricDefinitionsWithRetry(resourceId string, namespace string) (armmonitor.MetricDefinitionCollection, error)
 	GetMetricNamespaces(resourceId string) (armmonitor.MetricNamespaceCollection, error)
-	GetMetricValues(resourceId string, namespace string, timegrain string, timespan string, metricNames []string, aggregations string, filter string) ([]armmonitor.Metric, string, error)
+	// GetMetricValues returns the metric values for the given resource ID, namespace, timegrain, timespan, metricNames, aggregations and filter.
+	//
+	// If the timegrain is empty, the default timegrain for the metric is used and returned.
+	GetMetricValues(
+		resourceId string, // resourceId is the ID of the resource to query (e.g. "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/{resourceProviderNamespace}/{resourceType}/{resourceName}")
+		namespace string, // namespace is the metric namespace to query (e.g. "Microsoft.Compute/virtualMachines")
+		timegrain string, // timegrain is the timegrain to use for the metric query (e.g. "PT1M"); if empty, returns the default timegrain for the metric.
+		timespan string, // timespan is the time interval to query (e.g. 2024-04-29T14:03:00Z/2024-04-29T14:04:00Z)
+		metricNames []string, // metricNames is the list of metric names to query (e.g. ["ServiceApiLatency", "Availability"])
+		aggregations string, // aggregations is the comma-separated list of aggregations to use for the metric query (e.g. "Average,Maximum,Minimum")
+		filter string, // filter is the filter to query for dimensions (e.g. "ActivityType eq '*' AND ActivityName eq '*' AND StatusCode eq '*' AND StatusCodeClass eq '*'")
+	) ([]armmonitor.Metric, string, error)
 }