From 7d0b0d671cb287a13a305752ac6ea03d4ce200c1 Mon Sep 17 00:00:00 2001
From: David Hunt <david.hunt@intel.com>
Date: Wed, 18 Dec 2024 09:25:00 +0000
Subject: [PATCH] add patches to support IPM 24.12 release

Add patch sets for DPDK versions, including capacity metric:
   - 20.11.9
   - 21.11.8
   - 22.11.6
   - 23.11.2

Add patch sets for VPP versions, including capacity metric:
   - 20.09
   - 21.01
   - 22.02
   - 23.02
   - 24.02

Add python tool to read/write MSRs as an alternative to rdmsr/wrmsr.

Signed-off-by: David Hunt <david.hunt@intel.com>
---
 ...001-eal-add-lcore-busyness-telemetry.patch |  22 +-
 ...l-add-cpuset-lcore-telemetry-entries.patch |   6 +-
 ...apacity-endpoint-to-telemetry-thread.patch | 356 +++++++
 ...001-eal-add-lcore-busyness-telemetry.patch |  48 +-
 ...l-add-cpuset-lcore-telemetry-entries.patch |   6 +-
 ...apacity-endpoint-to-telemetry-thread.patch | 356 +++++++
 ...001-eal-add-lcore-busyness-telemetry.patch |  48 +-
 ...l-add-cpuset-lcore-telemetry-entries.patch |   6 +-
 ...apacity-endpoint-to-telemetry-thread.patch | 356 +++++++
 ...001-eal-add-lcore-busyness-telemetry.patch | 976 ++++++++++++++++++
 ...l-add-cpuset-lcore-telemetry-entries.patch |  79 ++
 ...apacity-endpoint-to-telemetry-thread.patch | 357 +++++++
 ipm/patches/dpdk/README.md                    |   7 +-
 ...PATCH-1-1-stats-Added-capacity-flags.patch | 325 ++++++
 ...PATCH-1-1-stats-Added-capacity-flags.patch | 325 ++++++
 ...4-stats-Added-capacity-flag-in-stats.patch | 365 +++++++
 ...01-vlib-CPU-load-measurement-and-CLI.patch | 114 ++
 ...U-load-and-queue-burst-flag-in-stats.patch | 343 ++++++
 ...-encode-cpu-id-in-utilization-metric.patch |  40 +
 ...4-stats-Added-capacity-flag-in-stats.patch | 365 +++++++
 ...01-vlib-CPU-load-measurement-and-CLI.patch | 114 ++
 ...U-load-and-queue-burst-flag-in-stats.patch | 351 +++++++
 ...-encode-cpu-id-in-utilization-metric.patch |  42 +
 ...4-stats-Added-capacity-flag-in-stats.patch | 365 +++++++
 msrtool/rw_msr_tool.py                        | 166 +++
 25 files changed, 5467 insertions(+), 71 deletions(-)
 create mode 100644 ipm/patches/dpdk/20.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
 create mode 100644 ipm/patches/dpdk/21.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
 create mode 100644 ipm/patches/dpdk/22.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
 create mode 100644 ipm/patches/dpdk/23.11/0001-eal-add-lcore-busyness-telemetry.patch
 create mode 100644 ipm/patches/dpdk/23.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
 create mode 100644 ipm/patches/dpdk/23.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
 create mode 100644 ipm/patches/vpp/20.09/0004-Subject-PATCH-1-1-stats-Added-capacity-flags.patch
 create mode 100644 ipm/patches/vpp/21.01/0004-Subject-PATCH-1-1-stats-Added-capacity-flags.patch
 create mode 100644 ipm/patches/vpp/22.02/0004-stats-Added-capacity-flag-in-stats.patch
 create mode 100644 ipm/patches/vpp/23.02/0001-vlib-CPU-load-measurement-and-CLI.patch
 create mode 100644 ipm/patches/vpp/23.02/0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch
 create mode 100644 ipm/patches/vpp/23.02/0003-stats-encode-cpu-id-in-utilization-metric.patch
 create mode 100644 ipm/patches/vpp/23.02/0004-stats-Added-capacity-flag-in-stats.patch
 create mode 100644 ipm/patches/vpp/24.02/0001-vlib-CPU-load-measurement-and-CLI.patch
 create mode 100644 ipm/patches/vpp/24.02/0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch
 create mode 100644 ipm/patches/vpp/24.02/0003-stats-encode-cpu-id-in-utilization-metric.patch
 create mode 100644 ipm/patches/vpp/24.02/0004-stats-Added-capacity-flag-in-stats.patch
 create mode 100755 msrtool/rw_msr_tool.py

diff --git a/ipm/patches/dpdk/20.11/0001-eal-add-lcore-busyness-telemetry.patch b/ipm/patches/dpdk/20.11/0001-eal-add-lcore-busyness-telemetry.patch
index 1cadea6..7be351e 100644
--- a/ipm/patches/dpdk/20.11/0001-eal-add-lcore-busyness-telemetry.patch
+++ b/ipm/patches/dpdk/20.11/0001-eal-add-lcore-busyness-telemetry.patch
@@ -1,7 +1,7 @@
-From 6942cd799c2fe3ec8e96d8c7758159456b37f9f4 Mon Sep 17 00:00:00 2001
+From ef227c95dd0ac20017c7190c42b6c6a98dae2ae0 Mon Sep 17 00:00:00 2001
 From: Anatoly Burakov <anatoly.burakov@intel.com>
 Date: Fri, 15 Jul 2022 13:12:53 +0000
-Subject: [PATCH 1/2] eal: add lcore busyness telemetry
+Subject: [PATCH 1/3] eal: add lcore busyness telemetry
 
 Currently, there is no way to measure lcore busyness in a passive way,
 without any modifications to the application. This patch adds a new EAL
@@ -205,7 +205,7 @@ index c210cf86bd..16806a896a 100644
  }
  
 diff --git a/lib/librte_distributor/rte_distributor_single.c b/lib/librte_distributor/rte_distributor_single.c
-index e8a13ce980..06e4bab89f 100644
+index f4725b1d0b..80460ab5d3 100644
 --- a/lib/librte_distributor/rte_distributor_single.c
 +++ b/lib/librte_distributor/rte_distributor_single.c
 @@ -34,8 +34,11 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
@@ -677,10 +677,10 @@ index a55fd7496d..90c2aa037a 100644
  }
  #endif
 diff --git a/lib/librte_eal/linux/eal.c b/lib/librte_eal/linux/eal.c
-index 5814f9ce69..772cc98143 100644
+index 814572ccbd..ce5701e42d 100644
 --- a/lib/librte_eal/linux/eal.c
 +++ b/lib/librte_eal/linux/eal.c
-@@ -1368,6 +1368,7 @@ rte_eal_cleanup(void)
+@@ -1364,6 +1364,7 @@ rte_eal_cleanup(void)
  	rte_mp_channel_cleanup();
  	rte_trace_save();
  	eal_trace_fini();
@@ -721,10 +721,10 @@ index fe065a41d9..d828a0d791 100644
  
  INTERNAL {
 diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
-index 5e8331da1c..e800ad05bf 100644
+index 8856648d11..87d30a059a 100644
 --- a/lib/librte_ethdev/rte_ethdev.h
 +++ b/lib/librte_ethdev/rte_ethdev.h
-@@ -4890,6 +4890,8 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
+@@ -4879,6 +4879,8 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
  #endif
  
  	rte_ethdev_trace_rx_burst(port_id, queue_id, (void **)rx_pkts, nb_rx);
@@ -734,7 +734,7 @@ index 5e8331da1c..e800ad05bf 100644
  }
  
 diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h
-index bec8f3c0c9..6cfe2925c8 100644
+index ce1fc2ce0f..820b3ba73d 100644
 --- a/lib/librte_eventdev/rte_eventdev.h
 +++ b/lib/librte_eventdev/rte_eventdev.h
 @@ -1663,13 +1663,19 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
@@ -786,7 +786,7 @@ index f29164dd15..c37712a3f0 100644
  
  int
 diff --git a/lib/librte_regexdev/rte_regexdev.h b/lib/librte_regexdev/rte_regexdev.h
-index df2312678c..00f0899fa8 100644
+index 0001658925..b3ec648ca9 100644
 --- a/lib/librte_regexdev/rte_regexdev.h
 +++ b/lib/librte_regexdev/rte_regexdev.h
 @@ -1524,6 +1524,7 @@ rte_regexdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
@@ -809,7 +809,7 @@ index df2312678c..00f0899fa8 100644
  
  #ifdef __cplusplus
 diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h
-index 0057da3597..5b1969cba1 100644
+index 7034d29c07..341bdf8dec 100644
 --- a/lib/librte_ring/rte_ring_elem.h
 +++ b/lib/librte_ring/rte_ring_elem.h
 @@ -475,6 +475,8 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
@@ -822,5 +822,5 @@ index 0057da3597..5b1969cba1 100644
  }
  
 -- 
-2.31.1
+2.25.1
 
diff --git a/ipm/patches/dpdk/20.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch b/ipm/patches/dpdk/20.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
index 036dc28..0558d2d 100644
--- a/ipm/patches/dpdk/20.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
+++ b/ipm/patches/dpdk/20.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
@@ -1,7 +1,7 @@
-From cb36d5eb09afc6bca1dc8682e7d7b5676203d66f Mon Sep 17 00:00:00 2001
+From 5c902504efd82545ee41bbeca7ef72682f5f8d65 Mon Sep 17 00:00:00 2001
 From: Anatoly Burakov <anatoly.burakov@intel.com>
 Date: Fri, 15 Jul 2022 13:12:54 +0000
-Subject: [PATCH 2/2] eal: add cpuset lcore telemetry entries
+Subject: [PATCH 2/3] eal: add cpuset lcore telemetry entries
 
 Expose per-lcore cpuset information to telemetry.
 
@@ -84,5 +84,5 @@ index 2e9033bf5a..f01ccd9a65 100644
  }
  
 -- 
-2.31.1
+2.25.1
 
diff --git a/ipm/patches/dpdk/20.11/0003-add-capacity-endpoint-to-telemetry-thread.patch b/ipm/patches/dpdk/20.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
new file mode 100644
index 0000000..825efad
--- /dev/null
+++ b/ipm/patches/dpdk/20.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
@@ -0,0 +1,356 @@
+From 81175d27730b2b69d36d00d4083872696db109e4 Mon Sep 17 00:00:00 2001
+From: David Hunt <david.hunt@intel.com>
+Date: Mon, 16 Sep 2024 14:59:56 +0100
+Subject: [PATCH 3/3] add capacity endpoint to telemetry thread
+
+Busyness is calculated on how busy the current core is, ignoring the
+current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows
+as 100% busy at 1GHz.
+
+This patch adds a new 'capacity' metric that shows a percentage based on
+the P1 (base) freqency of the core, so that if the core is 50% busy at
+P1, it should show 50% regardless of what the current frequency is.
+
+Signed-off-by: David Hunt <david.hunt@intel.com>
+---
+ .../common/eal_common_lcore_telemetry.c       | 240 ++++++++++++++++++
+ lib/librte_eal/include/rte_lcore.h            |  21 ++
+ lib/librte_eal/version.map                    |   1 +
+ 3 files changed, 262 insertions(+)
+
+diff --git a/lib/librte_eal/common/eal_common_lcore_telemetry.c b/lib/librte_eal/common/eal_common_lcore_telemetry.c
+index f01ccd9a65..18dcc40b1e 100644
+--- a/lib/librte_eal/common/eal_common_lcore_telemetry.c
++++ b/lib/librte_eal/common/eal_common_lcore_telemetry.c
+@@ -10,9 +10,18 @@
+ #include <rte_cycles.h>
+ #include <rte_errno.h>
+ #include <rte_lcore.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <fcntl.h>
+ 
+ #ifdef RTE_LCORE_BUSYNESS
+ #include <rte_telemetry.h>
++#define MSR_PLATFORM_INFO 0xCE
++#define POWER_SYSFS_CUR_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
++#define POWER_SYSFS_BASE_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
++#define POWER_SYSFS_SCALING_DRIVER_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver"
++#define POWER_SYSFS_SCALING_MAX_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
++#define POWER_SYSFS_MSR_PATH    "/dev/cpu/%u/msr"
+ #endif
+ 
+ int __rte_lcore_telemetry_enabled;
+@@ -47,6 +56,182 @@ static struct lcore_telemetry *telemetry_data;
+ #define SMOOTH_COEFF 5
+ #define STATE_CHANGE_OPT 32
+ 
++static int p1_freq[RTE_MAX_LCORE] = {0};
++
++static int
++try_read_base_frequency(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id));
++
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		close(fd);
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++	close(fd);
++
++	p1_freq[lcore_id] = atoi(buffer);
++	return p1_freq[lcore_id];
++
++
++}
++
++static int
++try_read_scaling_max_freq(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int freq;
++	int fd;
++
++	/*
++	 * If the driver is acpi_cpufreq, we can read the scaling_max_freq file
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, rte_lcore_to_cpu_id(lcore_id));
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		close(fd);
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	close(fd);
++
++	if (strncmp(buffer, "acpi-cpufreq", 12) == 0) {
++		/* we can use the scaling_max_freq to get the p1 */
++		snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id));
++		fd = open(path, O_RDONLY);
++		if (fd == -1) {
++			return -1;
++		}
++		ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++		if (bytesRead == -1) {
++			close(fd);
++			return -1;
++		}
++		buffer[bytesRead] = '\0'; // Null-terminate the buffer
++		close(fd);
++		freq = atoi(buffer) / 1000; /* convert to KHz */
++
++		/*
++		 * If the freq value ends with '1', then, turbo is enabled.
++		 * Round it down to the nearest 100. Otherwuse use the value.
++		 */
++		return (freq & ~1) * 1000; /* convert to Hz */
++	}
++	return -1;
++}
++
++static int
++try_read_msr(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	int freq;
++	uint64_t data;
++
++	/*
++	 * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, rte_lcore_to_cpu_id(lcore_id));
++	fd = open(path, O_RDONLY);
++	if (fd < 0) {
++		return -1;
++	}
++
++	if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) {
++		close(fd);
++		return -1;
++	}
++
++	close(fd);
++
++	freq = ((data >> 8) & 0xff) * 100 * 1000;
++
++	return freq;
++}
++
++
++static
++int read_sysfs_p1_freq(unsigned int lcore_id) {
++	int freq;
++
++	/* We've previously got the p1 frequency. */
++	if (p1_freq[lcore_id] != 0)
++		return p1_freq[lcore_id];
++
++	/*
++	 * Check the base_frequency file, if it's there
++	 */
++	freq = try_read_base_frequency(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Check the scaling_max_freq file for the acpi-freq driver
++	 */
++	freq = try_read_scaling_max_freq(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Try reading from the MSR register
++	 */
++	freq = try_read_msr(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	RTE_LOG(ERR, EAL, "Capacity telemetry for lcore %d not supported: no p1 frequency found",
++			lcore_id);
++
++	return -1;
++}
++
++
++int current_fds[RTE_MAX_LCORE] = {0};
++
++static
++int read_sysfs_cur_freq(unsigned int lcore_id) {
++	char path[PATH_MAX];
++
++	if (current_fds[lcore_id] == 0) {
++		snprintf(path, sizeof(path), POWER_SYSFS_CUR_PATH, rte_lcore_to_cpu_id(lcore_id));
++		current_fds[lcore_id] = open(path, O_RDONLY);
++		if (current_fds[lcore_id] == -1) {
++			return -1;
++		}
++	}
++
++	char buffer[16];
++	ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	int value = atoi(buffer);
++	return value;
++}
++
+ /* Helper function to check if the lcore is enabled.
+  * Cannot use rte_lcore_is_enabled since it only catches ROLE_RTE threads which
+  * does not include ROLE_NON_EAL threads which some application threads, for
+@@ -102,6 +287,33 @@ int rte_lcore_busyness(unsigned int lcore_id)
+ 	return telemetry_data[lcore_id].busyness;
+ }
+ 
++int rte_lcore_capacity(unsigned int lcore_id)
++{
++	const uint64_t active_thresh = RTE_LCORE_BUSYNESS_PERIOD * 1000;
++	struct lcore_telemetry *tdata;
++
++	if (lcore_id >= RTE_MAX_LCORE)
++		return -EINVAL;
++	tdata = &telemetry_data[lcore_id];
++
++	/* if the lcore is not active */
++	if (tdata->interval_ts == 0)
++		return LCORE_BUSYNESS_NOT_SET;
++	/* if the core hasn't been active in a while */
++	else if ((rte_rdtsc() - tdata->interval_ts) > active_thresh)
++		return LCORE_BUSYNESS_NOT_SET;
++
++	int cur_freq = read_sysfs_cur_freq(rte_lcore_to_cpu_id(lcore_id));
++	int busy = telemetry_data[lcore_id].busyness;
++	int p1 = read_sysfs_p1_freq(lcore_id) ;
++
++	if ((busy == -1) || (p1 <= 0)) {
++		return -1;
++	} else {
++		return busy * cur_freq / p1;
++	}
++}
++
+ int rte_lcore_busyness_enabled(void)
+ {
+ 	return __rte_lcore_telemetry_enabled;
+@@ -263,6 +475,26 @@ lcore_handle_busyness(const char *cmd __rte_unused,
+ 	return 0;
+ }
+ 
++static int
++lcore_handle_capacity(const char *cmd __rte_unused,
++		      const char *params __rte_unused, struct rte_tel_data *d)
++{
++	char corenum[64];
++	int i;
++
++	rte_tel_data_start_dict(d);
++
++	/* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */
++	for (i = 0; i < RTE_MAX_LCORE; i++) {
++		if (!lcore_enabled(i))
++			continue;
++		snprintf(corenum, sizeof(corenum), "%d", i);
++		rte_tel_data_add_dict_int(d, corenum, rte_lcore_capacity(i));
++	}
++
++	return 0;
++}
++
+ static int
+ lcore_handle_cpuset(const char *cmd __rte_unused,
+ 		    const char *params __rte_unused,
+@@ -326,6 +558,9 @@ RTE_INIT(lcore_init_telemetry)
+ 	rte_telemetry_register_cmd("/eal/lcore/busyness", lcore_handle_busyness,
+ 				   "return percentage busyness of cores");
+ 
++	rte_telemetry_register_cmd("/eal/lcore/capacity_used", lcore_handle_capacity,
++				   "return percentage capacity of cores");
++
+ 	rte_telemetry_register_cmd("/eal/lcore/busyness_enable", lcore_busyness_enable,
+ 				   "enable lcore busyness measurement");
+ 
+@@ -340,6 +575,11 @@ RTE_INIT(lcore_init_telemetry)
+ 
+ #else
+ 
++int rte_lcore_capacity(unsigned int lcore_id __rte_unused)
++{
++	return -ENOTSUP;
++}
++
+ int rte_lcore_busyness(unsigned int lcore_id __rte_unused)
+ {
+ 	return -ENOTSUP;
+diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h
+index 90c2aa037a..dddc529ccd 100644
+--- a/lib/librte_eal/include/rte_lcore.h
++++ b/lib/librte_eal/include/rte_lcore.h
+@@ -487,6 +487,27 @@ __rte_experimental
+ int
+ rte_lcore_busyness(unsigned int lcore_id);
+ 
++/**
++ * @warning
++ * @b EXPERIMENTAL: this API may change without prior notice.
++ *
++ * Read capacity value corresponding to an lcore.
++ * This differs from busyness in that it is related to the current usage
++ * of the lcore compared to P1 frequency, not the current frequency.
++ *
++ * @param lcore_id
++ *   Lcore to read capacity value for.
++ * @return
++ *   - value between 0 and 100 on success
++ *   - -1 if lcore is not active
++ *   - -EINVAL if lcore is invalid
++ *   - -ENOMEM if not enough memory available
++ *   - -ENOTSUP if not supported
++ */
++__rte_experimental
++int
++rte_lcore_capacity(unsigned int lcore_id);
++
+ /**
+  * @warning
+  * @b EXPERIMENTAL: this API may change without prior notice.
+diff --git a/lib/librte_eal/version.map b/lib/librte_eal/version.map
+index d828a0d791..cac187ffdd 100644
+--- a/lib/librte_eal/version.map
++++ b/lib/librte_eal/version.map
+@@ -406,6 +406,7 @@ EXPERIMENTAL {
+ 
+ 	__rte_lcore_telemetry_timestamp;
+ 	__rte_lcore_telemetry_enabled;
++	rte_lcore_capacity;
+ 	rte_lcore_busyness;
+ 	rte_lcore_busyness_enabled;
+ 	rte_lcore_busyness_enabled_set;
+-- 
+2.25.1
+
diff --git a/ipm/patches/dpdk/21.11/0001-eal-add-lcore-busyness-telemetry.patch b/ipm/patches/dpdk/21.11/0001-eal-add-lcore-busyness-telemetry.patch
index bbe0514..7500cc6 100644
--- a/ipm/patches/dpdk/21.11/0001-eal-add-lcore-busyness-telemetry.patch
+++ b/ipm/patches/dpdk/21.11/0001-eal-add-lcore-busyness-telemetry.patch
@@ -1,7 +1,7 @@
-From 11d4e3f53f85c76944f86043a552a6b1308c3e32 Mon Sep 17 00:00:00 2001
-From: David Hunt <david.hunt@intel.com>
-Date: Fri, 4 Nov 2022 13:09:51 +0000
-Subject: [PATCH 1/2] eal: add lcore busyness telemetry
+From 95a7bd751ef9216b0ae9fe7d069af367e74dcf50 Mon Sep 17 00:00:00 2001
+From: Anatoly Burakov <anatoly.burakov@intel.com>
+Date: Mon, 11 Nov 2024 08:54:43 +0000
+Subject: [PATCH 1/3] eal: add lcore busyness telemetry
 
 Currently, there is no way to measure lcore busyness in a passive way,
 without any modifications to the application. This patch adds a new EAL
@@ -72,12 +72,12 @@ Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
  create mode 100644 lib/eal/common/eal_common_lcore_telemetry.c
 
 diff --git a/config/rte_config.h b/config/rte_config.h
-index cab4390a97..f3fdfa5626 100644
+index 2f1a3ffb21..4696d8c97b 100644
 --- a/config/rte_config.h
 +++ b/config/rte_config.h
-@@ -39,6 +39,8 @@
+@@ -38,6 +38,8 @@
+ #define RTE_MAX_TAILQ 32
  #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
- #define RTE_BACKTRACE 1
  #define RTE_MAX_VFIO_CONTAINERS 64
 +#define RTE_LCORE_BUSYNESS 1
 +#define RTE_LCORE_BUSYNESS_PERIOD 4000000ULL
@@ -154,10 +154,10 @@ index 2e9218af68..5a72e82768 100644
  }
  
 diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
-index 59ea5a54df..d81a37f0c7 100644
+index eead3d2bff..e1910da48e 100644
 --- a/lib/cryptodev/rte_cryptodev.h
 +++ b/lib/cryptodev/rte_cryptodev.h
-@@ -1867,6 +1867,8 @@ rte_cryptodev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
+@@ -1874,6 +1874,8 @@ rte_cryptodev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
  		rte_rcu_qsbr_thread_offline(list->qsbr, 0);
  	}
  #endif
@@ -640,17 +640,17 @@ index 917758cc65..a743e66a7d 100644
          'eal_common_log.c',
          'eal_common_mcfg.c',
 diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
-index 414aad3dd3..c6d3975b43 100644
+index 66553089fa..2a02c036f0 100644
 --- a/lib/eal/freebsd/eal.c
 +++ b/lib/eal/freebsd/eal.c
-@@ -988,6 +988,7 @@ rte_eal_cleanup(void)
+@@ -998,6 +998,7 @@ rte_eal_cleanup(void)
  	rte_mp_channel_cleanup();
  	rte_trace_save();
  	eal_trace_fini();
 +	eal_lcore_telemetry_free();
+ 	rte_eal_alarm_cleanup();
  	/* after this point, any DPDK pointers will become dangling */
  	rte_eal_memory_detach();
- 	rte_eal_alarm_cleanup();
 diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
 index 258bc49b24..85d6e38f4e 100644
 --- a/lib/eal/include/rte_lcore.h
@@ -747,17 +747,17 @@ index 258bc49b24..85d6e38f4e 100644
  }
  #endif
 diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
-index e3d34f7b7c..594a7bc810 100644
+index 6f7e8641d3..520ec01afd 100644
 --- a/lib/eal/linux/eal.c
 +++ b/lib/eal/linux/eal.c
-@@ -1370,6 +1370,7 @@ rte_eal_cleanup(void)
+@@ -1380,6 +1380,7 @@ rte_eal_cleanup(void)
  	rte_mp_channel_cleanup();
  	rte_trace_save();
  	eal_trace_fini();
 +	eal_lcore_telemetry_free();
+ 	rte_eal_alarm_cleanup();
  	/* after this point, any DPDK pointers will become dangling */
  	rte_eal_memory_detach();
- 	rte_eal_alarm_cleanup();
 diff --git a/lib/eal/meson.build b/lib/eal/meson.build
 index 1722924f67..01b51f0105 100644
 --- a/lib/eal/meson.build
@@ -791,10 +791,10 @@ index ab28c22791..a06a9c2a47 100644
  
  INTERNAL {
 diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
-index 0be04c5809..a00d1e7f26 100644
+index 083f324a46..35f2539629 100644
 --- a/lib/ethdev/rte_ethdev.h
 +++ b/lib/ethdev/rte_ethdev.h
-@@ -5357,6 +5357,8 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
+@@ -5361,6 +5361,8 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
  #endif
  
  	rte_ethdev_trace_rx_burst(port_id, queue_id, (void **)rx_pkts, nb_rx);
@@ -804,10 +804,10 @@ index 0be04c5809..a00d1e7f26 100644
  }
  
 diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
-index 476bcbcc21..6da0734515 100644
+index f09ea4a9d1..c3f9e4fcd7 100644
 --- a/lib/eventdev/rte_eventdev.h
 +++ b/lib/eventdev/rte_eventdev.h
-@@ -2055,6 +2055,7 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
+@@ -2070,6 +2070,7 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
  			uint16_t nb_events, uint64_t timeout_ticks)
  {
  	const struct rte_event_fp_ops *fp_ops;
@@ -815,7 +815,7 @@ index 476bcbcc21..6da0734515 100644
  	void *port;
  
  	fp_ops = &rte_event_fp_ops[dev_id];
-@@ -2077,10 +2078,13 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
+@@ -2092,10 +2093,13 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
  	 * requests nb_events as const one
  	 */
  	if (nb_events == 1)
@@ -833,7 +833,7 @@ index 476bcbcc21..6da0734515 100644
  
  #define RTE_EVENT_DEV_MAINT_OP_FLUSH          (1 << 0)
 diff --git a/lib/rawdev/rte_rawdev.c b/lib/rawdev/rte_rawdev.c
-index a6134e76ea..e759999553 100644
+index c06ed8b9c7..2c46e6fc64 100644
 --- a/lib/rawdev/rte_rawdev.c
 +++ b/lib/rawdev/rte_rawdev.c
 @@ -240,12 +240,15 @@ rte_rawdev_dequeue_buffers(uint16_t dev_id,
@@ -877,10 +877,10 @@ index 513ce5b67c..de29dc3940 100644
  
  #ifdef __cplusplus
 diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h
-index 275ec55393..4f3ed674ce 100644
+index 99786cca95..9f0250636b 100644
 --- a/lib/ring/rte_ring_elem_pvt.h
 +++ b/lib/ring/rte_ring_elem_pvt.h
-@@ -379,6 +379,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
+@@ -385,6 +385,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
  end:
  	if (available != NULL)
  		*available = entries - n;
@@ -889,5 +889,5 @@ index 275ec55393..4f3ed674ce 100644
  }
  
 -- 
-2.31.1
+2.25.1
 
diff --git a/ipm/patches/dpdk/21.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch b/ipm/patches/dpdk/21.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
index 48e8f00..1ea9569 100644
--- a/ipm/patches/dpdk/21.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
+++ b/ipm/patches/dpdk/21.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
@@ -1,7 +1,7 @@
-From ab83a7cbdea4d38de86b7f3c1a5e64a2aff0c06e Mon Sep 17 00:00:00 2001
+From ac2b8db5f2dc2578b99a63b0abaea703c092ab42 Mon Sep 17 00:00:00 2001
 From: Anatoly Burakov <anatoly.burakov@intel.com>
 Date: Fri, 15 Jul 2022 13:12:45 +0000
-Subject: [PATCH 2/2] eal: add cpuset lcore telemetry entries
+Subject: [PATCH 2/3] eal: add cpuset lcore telemetry entries
 
 Expose per-lcore cpuset information to telemetry.
 
@@ -84,5 +84,5 @@ index 2e9033bf5a..f01ccd9a65 100644
  }
  
 -- 
-2.31.1
+2.25.1
 
diff --git a/ipm/patches/dpdk/21.11/0003-add-capacity-endpoint-to-telemetry-thread.patch b/ipm/patches/dpdk/21.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
new file mode 100644
index 0000000..8b8b247
--- /dev/null
+++ b/ipm/patches/dpdk/21.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
@@ -0,0 +1,356 @@
+From 644d8d946ce5e31c9a818da9661f4e0658f57754 Mon Sep 17 00:00:00 2001
+From: David Hunt <david.hunt@intel.com>
+Date: Mon, 16 Sep 2024 14:28:18 +0100
+Subject: [PATCH 3/3] add capacity endpoint to telemetry thread
+
+Busyness is calculated on how busy the current core is, ignoring the
+current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows
+as 100% busy at 1GHz.
+
+This patch adds a new 'capacity' metric that shows a percentage based on
+the P1 (base) freqency of the core, so that if the core is 50% busy at
+P1, it should show 50% regardless of what the current frequency is.
+
+Signed-off-by: David Hunt <david.hunt@intel.com>
+---
+ lib/eal/common/eal_common_lcore_telemetry.c | 240 ++++++++++++++++++++
+ lib/eal/include/rte_lcore.h                 |  21 ++
+ lib/eal/version.map                         |   1 +
+ 3 files changed, 262 insertions(+)
+
+diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c
+index f01ccd9a65..18dcc40b1e 100644
+--- a/lib/eal/common/eal_common_lcore_telemetry.c
++++ b/lib/eal/common/eal_common_lcore_telemetry.c
+@@ -10,9 +10,18 @@
+ #include <rte_cycles.h>
+ #include <rte_errno.h>
+ #include <rte_lcore.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <fcntl.h>
+ 
+ #ifdef RTE_LCORE_BUSYNESS
+ #include <rte_telemetry.h>
++#define MSR_PLATFORM_INFO 0xCE
++#define POWER_SYSFS_CUR_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
++#define POWER_SYSFS_BASE_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
++#define POWER_SYSFS_SCALING_DRIVER_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver"
++#define POWER_SYSFS_SCALING_MAX_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
++#define POWER_SYSFS_MSR_PATH    "/dev/cpu/%u/msr"
+ #endif
+ 
+ int __rte_lcore_telemetry_enabled;
+@@ -47,6 +56,182 @@ static struct lcore_telemetry *telemetry_data;
+ #define SMOOTH_COEFF 5
+ #define STATE_CHANGE_OPT 32
+ 
++static int p1_freq[RTE_MAX_LCORE] = {0};
++
++static int
++try_read_base_frequency(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id));
++
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		close(fd);
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++	close(fd);
++
++	p1_freq[lcore_id] = atoi(buffer);
++	return p1_freq[lcore_id];
++
++
++}
++
++static int
++try_read_scaling_max_freq(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int freq;
++	int fd;
++
++	/*
++	 * If the driver is acpi_cpufreq, we can read the scaling_max_freq file
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, rte_lcore_to_cpu_id(lcore_id));
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		close(fd);
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	close(fd);
++
++	if (strncmp(buffer, "acpi-cpufreq", 12) == 0) {
++		/* we can use the scaling_max_freq to get the p1 */
++		snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id));
++		fd = open(path, O_RDONLY);
++		if (fd == -1) {
++			return -1;
++		}
++		ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++		if (bytesRead == -1) {
++			close(fd);
++			return -1;
++		}
++		buffer[bytesRead] = '\0'; // Null-terminate the buffer
++		close(fd);
++		freq = atoi(buffer) / 1000; /* convert to KHz */
++
++		/*
++		 * If the freq value ends with '1', then, turbo is enabled.
++		 * Round it down to the nearest 100. Otherwuse use the value.
++		 */
++		return (freq & ~1) * 1000; /* convert to Hz */
++	}
++	return -1;
++}
++
++static int
++try_read_msr(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	int freq;
++	uint64_t data;
++
++	/*
++	 * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, rte_lcore_to_cpu_id(lcore_id));
++	fd = open(path, O_RDONLY);
++	if (fd < 0) {
++		return -1;
++	}
++
++	if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) {
++		close(fd);
++		return -1;
++	}
++
++	close(fd);
++
++	freq = ((data >> 8) & 0xff) * 100 * 1000;
++
++	return freq;
++}
++
++
++static
++int read_sysfs_p1_freq(unsigned int lcore_id) {
++	int freq;
++
++	/* We've previously got the p1 frequency. */
++	if (p1_freq[lcore_id] != 0)
++		return p1_freq[lcore_id];
++
++	/*
++	 * Check the base_frequency file, if it's there
++	 */
++	freq = try_read_base_frequency(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Check the scaling_max_freq file for the acpi-freq driver
++	 */
++	freq = try_read_scaling_max_freq(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Try reading from the MSR register
++	 */
++	freq = try_read_msr(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	RTE_LOG(ERR, EAL, "Capacity telemetry for lcore %d not supported: no p1 frequency found",
++			lcore_id);
++
++	return -1;
++}
++
++
++int current_fds[RTE_MAX_LCORE] = {0};
++
++static
++int read_sysfs_cur_freq(unsigned int lcore_id) {
++	char path[PATH_MAX];
++
++	if (current_fds[lcore_id] == 0) {
++		snprintf(path, sizeof(path), POWER_SYSFS_CUR_PATH, rte_lcore_to_cpu_id(lcore_id));
++		current_fds[lcore_id] = open(path, O_RDONLY);
++		if (current_fds[lcore_id] == -1) {
++			return -1;
++		}
++	}
++
++	char buffer[16];
++	ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	int value = atoi(buffer);
++	return value;
++}
++
+ /* Helper function to check if the lcore is enabled.
+  * Cannot use rte_lcore_is_enabled since it only catches ROLE_RTE threads which
+  * does not include ROLE_NON_EAL threads which some application threads, for
+@@ -102,6 +287,33 @@ int rte_lcore_busyness(unsigned int lcore_id)
+ 	return telemetry_data[lcore_id].busyness;
+ }
+ 
++int rte_lcore_capacity(unsigned int lcore_id)
++{
++	const uint64_t active_thresh = RTE_LCORE_BUSYNESS_PERIOD * 1000;
++	struct lcore_telemetry *tdata;
++
++	if (lcore_id >= RTE_MAX_LCORE)
++		return -EINVAL;
++	tdata = &telemetry_data[lcore_id];
++
++	/* if the lcore is not active */
++	if (tdata->interval_ts == 0)
++		return LCORE_BUSYNESS_NOT_SET;
++	/* if the core hasn't been active in a while */
++	else if ((rte_rdtsc() - tdata->interval_ts) > active_thresh)
++		return LCORE_BUSYNESS_NOT_SET;
++
++	int cur_freq = read_sysfs_cur_freq(rte_lcore_to_cpu_id(lcore_id));
++	int busy = telemetry_data[lcore_id].busyness;
++	int p1 = read_sysfs_p1_freq(lcore_id) ;
++
++	if ((busy == -1) || (p1 <= 0)) {
++		return -1;
++	} else {
++		return busy * cur_freq / p1;
++	}
++}
++
+ int rte_lcore_busyness_enabled(void)
+ {
+ 	return __rte_lcore_telemetry_enabled;
+@@ -263,6 +475,26 @@ lcore_handle_busyness(const char *cmd __rte_unused,
+ 	return 0;
+ }
+ 
++static int
++lcore_handle_capacity(const char *cmd __rte_unused,
++		      const char *params __rte_unused, struct rte_tel_data *d)
++{
++	char corenum[64];
++	int i;
++
++	rte_tel_data_start_dict(d);
++
++	/* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */
++	for (i = 0; i < RTE_MAX_LCORE; i++) {
++		if (!lcore_enabled(i))
++			continue;
++		snprintf(corenum, sizeof(corenum), "%d", i);
++		rte_tel_data_add_dict_int(d, corenum, rte_lcore_capacity(i));
++	}
++
++	return 0;
++}
++
+ static int
+ lcore_handle_cpuset(const char *cmd __rte_unused,
+ 		    const char *params __rte_unused,
+@@ -326,6 +558,9 @@ RTE_INIT(lcore_init_telemetry)
+ 	rte_telemetry_register_cmd("/eal/lcore/busyness", lcore_handle_busyness,
+ 				   "return percentage busyness of cores");
+ 
++	rte_telemetry_register_cmd("/eal/lcore/capacity_used", lcore_handle_capacity,
++				   "return percentage capacity of cores");
++
+ 	rte_telemetry_register_cmd("/eal/lcore/busyness_enable", lcore_busyness_enable,
+ 				   "enable lcore busyness measurement");
+ 
+@@ -340,6 +575,11 @@ RTE_INIT(lcore_init_telemetry)
+ 
+ #else
+ 
++int rte_lcore_capacity(unsigned int lcore_id __rte_unused)
++{
++	return -ENOTSUP;
++}
++
+ int rte_lcore_busyness(unsigned int lcore_id __rte_unused)
+ {
+ 	return -ENOTSUP;
+diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
+index 85d6e38f4e..4a631e9645 100644
+--- a/lib/eal/include/rte_lcore.h
++++ b/lib/eal/include/rte_lcore.h
+@@ -443,6 +443,27 @@ __rte_experimental
+ int
+ rte_lcore_busyness(unsigned int lcore_id);
+ 
++/**
++ * @warning
++ * @b EXPERIMENTAL: this API may change without prior notice.
++ *
++ * Read capacity value corresponding to an lcore.
++ * This differs from busyness in that it is related to the current usage
++ * of the lcore compared to P1 frequency, not the current frequency.
++ *
++ * @param lcore_id
++ *   Lcore to read capacity value for.
++ * @return
++ *   - value between 0 and 100 on success
++ *   - -1 if lcore is not active
++ *   - -EINVAL if lcore is invalid
++ *   - -ENOMEM if not enough memory available
++ *   - -ENOTSUP if not supported
++ */
++__rte_experimental
++int
++rte_lcore_capacity(unsigned int lcore_id);
++
+ /**
+  * @warning
+  * @b EXPERIMENTAL: this API may change without prior notice.
+diff --git a/lib/eal/version.map b/lib/eal/version.map
+index a06a9c2a47..a405bfb319 100644
+--- a/lib/eal/version.map
++++ b/lib/eal/version.map
+@@ -424,6 +424,7 @@ EXPERIMENTAL {
+ 	# Telemetry patch set APIs
+ 	__rte_lcore_telemetry_timestamp;
+ 	__rte_lcore_telemetry_enabled;
++	rte_lcore_capacity;
+ 	rte_lcore_busyness;
+ 	rte_lcore_busyness_enabled;
+ 	rte_lcore_busyness_enabled_set;
+-- 
+2.25.1
+
diff --git a/ipm/patches/dpdk/22.11/0001-eal-add-lcore-busyness-telemetry.patch b/ipm/patches/dpdk/22.11/0001-eal-add-lcore-busyness-telemetry.patch
index b5ba61d..7e8d5c4 100644
--- a/ipm/patches/dpdk/22.11/0001-eal-add-lcore-busyness-telemetry.patch
+++ b/ipm/patches/dpdk/22.11/0001-eal-add-lcore-busyness-telemetry.patch
@@ -1,7 +1,7 @@
-From 26e52ef9874d526d0d06a08f1463ee9ec9ba51ba Mon Sep 17 00:00:00 2001
+From 5129fb19501eecce2410efc58548951ffb02c226 Mon Sep 17 00:00:00 2001
 From: Anatoly Burakov <anatoly.burakov@intel.com>
-Date: Fri, 15 Jul 2022 13:12:44 +0000
-Subject: [PATCH 1/2] eal: add lcore busyness telemetry
+Date: Mon, 11 Nov 2024 09:25:29 +0000
+Subject: [PATCH 1/3] eal: add lcore busyness telemetry
 
 Currently, there is no way to measure lcore busyness in a passive way,
 without any modifications to the application. This patch adds a new EAL
@@ -72,12 +72,12 @@ Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
  create mode 100644 lib/eal/common/eal_common_lcore_telemetry.c
 
 diff --git a/config/rte_config.h b/config/rte_config.h
-index 3c4876d434..864ede39b3 100644
+index 7b8c85e948..1193d936f6 100644
 --- a/config/rte_config.h
 +++ b/config/rte_config.h
-@@ -39,6 +39,8 @@
+@@ -38,6 +38,8 @@
+ #define RTE_MAX_TAILQ 32
  #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
- #define RTE_BACKTRACE 1
  #define RTE_MAX_VFIO_CONTAINERS 64
 +#define RTE_LCORE_BUSYNESS 1
 +#define RTE_LCORE_BUSYNESS_PERIOD 4000000ULL
@@ -154,10 +154,10 @@ index cf2c20a704..04a854a935 100644
  }
  
 diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
-index 86d792e2e7..71d1ccbe9a 100644
+index cef9f2b3cb..82fc21d24d 100644
 --- a/lib/cryptodev/rte_cryptodev.h
 +++ b/lib/cryptodev/rte_cryptodev.h
-@@ -1855,6 +1855,8 @@ rte_cryptodev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
+@@ -1874,6 +1874,8 @@ rte_cryptodev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
  		rte_rcu_qsbr_thread_offline(list->qsbr, 0);
  	}
  #endif
@@ -642,17 +642,17 @@ index 917758cc65..a743e66a7d 100644
          'eal_common_log.c',
          'eal_common_mcfg.c',
 diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
-index 607684c1a3..a34b2d3116 100644
+index 122daf6c1f..a7be6ab285 100644
 --- a/lib/eal/freebsd/eal.c
 +++ b/lib/eal/freebsd/eal.c
-@@ -896,6 +896,7 @@ rte_eal_cleanup(void)
+@@ -906,6 +906,7 @@ rte_eal_cleanup(void)
  	eal_bus_cleanup();
  	rte_trace_save();
  	eal_trace_fini();
 +	eal_lcore_telemetry_free();
+ 	rte_eal_alarm_cleanup();
  	/* after this point, any DPDK pointers will become dangling */
  	rte_eal_memory_detach();
- 	rte_eal_alarm_cleanup();
 diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
 index 6938c3fd7b..9f4bd6e22f 100644
 --- a/lib/eal/include/rte_lcore.h
@@ -749,17 +749,17 @@ index 6938c3fd7b..9f4bd6e22f 100644
  }
  #endif
 diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
-index 8c118d0d9f..2584213f5f 100644
+index 336698379f..2bff37791e 100644
 --- a/lib/eal/linux/eal.c
 +++ b/lib/eal/linux/eal.c
-@@ -1372,6 +1372,7 @@ rte_eal_cleanup(void)
+@@ -1382,6 +1382,7 @@ rte_eal_cleanup(void)
  	eal_bus_cleanup();
  	rte_trace_save();
  	eal_trace_fini();
 +	eal_lcore_telemetry_free();
- 	/* after this point, any DPDK pointers will become dangling */
- 	rte_eal_memory_detach();
  	eal_mp_dev_hotplug_cleanup();
+ 	rte_eal_alarm_cleanup();
+ 	/* after this point, any DPDK pointers will become dangling */
 diff --git a/lib/eal/meson.build b/lib/eal/meson.build
 index 056beb9461..7199aa03c2 100644
 --- a/lib/eal/meson.build
@@ -793,10 +793,10 @@ index 7ad12a7dc9..7791f59314 100644
  
  INTERNAL {
 diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
-index 13fe73d5a3..8c5518c25b 100644
+index e73244822a..17fabfa5b0 100644
 --- a/lib/ethdev/rte_ethdev.h
 +++ b/lib/ethdev/rte_ethdev.h
-@@ -5907,6 +5907,8 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
+@@ -5913,6 +5913,8 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
  #endif
  
  	rte_ethdev_trace_rx_burst(port_id, queue_id, (void **)rx_pkts, nb_rx);
@@ -806,10 +806,10 @@ index 13fe73d5a3..8c5518c25b 100644
  }
  
 diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
-index a90e23ac8b..6db744db33 100644
+index d0e2463bb8..b4f677c46d 100644
 --- a/lib/eventdev/rte_eventdev.h
 +++ b/lib/eventdev/rte_eventdev.h
-@@ -2179,6 +2179,7 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
+@@ -2194,6 +2194,7 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
  			uint16_t nb_events, uint64_t timeout_ticks)
  {
  	const struct rte_event_fp_ops *fp_ops;
@@ -817,7 +817,7 @@ index a90e23ac8b..6db744db33 100644
  	void *port;
  
  	fp_ops = &rte_event_fp_ops[dev_id];
-@@ -2201,10 +2202,13 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
+@@ -2216,10 +2217,13 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
  	 * requests nb_events as const one
  	 */
  	if (nb_events == 1)
@@ -835,7 +835,7 @@ index a90e23ac8b..6db744db33 100644
  
  #define RTE_EVENT_DEV_MAINT_OP_FLUSH          (1 << 0)
 diff --git a/lib/rawdev/rte_rawdev.c b/lib/rawdev/rte_rawdev.c
-index 5fbdb94229..b541c73b07 100644
+index dcebe4f653..0faae13c9c 100644
 --- a/lib/rawdev/rte_rawdev.c
 +++ b/lib/rawdev/rte_rawdev.c
 @@ -237,13 +237,16 @@ rte_rawdev_dequeue_buffers(uint16_t dev_id,
@@ -880,10 +880,10 @@ index 25476f1f73..31fd293e48 100644
  
  #ifdef __cplusplus
 diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h
-index 83788c56e6..6db09d4291 100644
+index 4b80f58980..e2a72e3ea7 100644
 --- a/lib/ring/rte_ring_elem_pvt.h
 +++ b/lib/ring/rte_ring_elem_pvt.h
-@@ -379,6 +379,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
+@@ -385,6 +385,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
  end:
  	if (available != NULL)
  		*available = entries - n;
@@ -892,5 +892,5 @@ index 83788c56e6..6db09d4291 100644
  }
  
 -- 
-2.31.1
+2.25.1
 
diff --git a/ipm/patches/dpdk/22.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch b/ipm/patches/dpdk/22.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
index 383f984..2a02775 100644
--- a/ipm/patches/dpdk/22.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
+++ b/ipm/patches/dpdk/22.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
@@ -1,7 +1,7 @@
-From e7207152706cce06e1aeecee9b8a63d116a20061 Mon Sep 17 00:00:00 2001
+From 810d87bf69d79351cfa3089df920e4b726f269a5 Mon Sep 17 00:00:00 2001
 From: Anatoly Burakov <anatoly.burakov@intel.com>
 Date: Fri, 15 Jul 2022 13:12:45 +0000
-Subject: [PATCH 2/2] eal: add cpuset lcore telemetry entries
+Subject: [PATCH 2/3] eal: add cpuset lcore telemetry entries
 
 Expose per-lcore cpuset information to telemetry.
 
@@ -75,5 +75,5 @@ index 1478e5a48a..f01ccd9a65 100644
  }
  
 -- 
-2.31.1
+2.25.1
 
diff --git a/ipm/patches/dpdk/22.11/0003-add-capacity-endpoint-to-telemetry-thread.patch b/ipm/patches/dpdk/22.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
new file mode 100644
index 0000000..df6cbd4
--- /dev/null
+++ b/ipm/patches/dpdk/22.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
@@ -0,0 +1,356 @@
+From ea2762b20c60cd66378758559af90bb48c9a8ee5 Mon Sep 17 00:00:00 2001
+From: David Hunt <david.hunt@intel.com>
+Date: Fri, 23 Aug 2024 09:07:08 +0100
+Subject: [PATCH 3/3] add capacity endpoint to telemetry thread
+
+Busyness is calculated on how busy the current core is, ignoring the
+current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows
+as 100% busy at 1GHz.
+
+This patch adds a new 'capacity' metric that shows a percentage based on
+the P1 (base) freqency of the core, so that if the core is 50% busy at
+P1, it should show 50% regardless of what the current frequency is.
+
+Signed-off-by: David Hunt <david.hunt@intel.com>
+---
+ lib/eal/common/eal_common_lcore_telemetry.c | 240 ++++++++++++++++++++
+ lib/eal/include/rte_lcore.h                 |  21 ++
+ lib/eal/version.map                         |   1 +
+ 3 files changed, 262 insertions(+)
+
+diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c
+index f01ccd9a65..18dcc40b1e 100644
+--- a/lib/eal/common/eal_common_lcore_telemetry.c
++++ b/lib/eal/common/eal_common_lcore_telemetry.c
+@@ -10,9 +10,18 @@
+ #include <rte_cycles.h>
+ #include <rte_errno.h>
+ #include <rte_lcore.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <fcntl.h>
+ 
+ #ifdef RTE_LCORE_BUSYNESS
+ #include <rte_telemetry.h>
++#define MSR_PLATFORM_INFO 0xCE
++#define POWER_SYSFS_CUR_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
++#define POWER_SYSFS_BASE_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
++#define POWER_SYSFS_SCALING_DRIVER_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver"
++#define POWER_SYSFS_SCALING_MAX_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
++#define POWER_SYSFS_MSR_PATH    "/dev/cpu/%u/msr"
+ #endif
+ 
+ int __rte_lcore_telemetry_enabled;
+@@ -47,6 +56,182 @@ static struct lcore_telemetry *telemetry_data;
+ #define SMOOTH_COEFF 5
+ #define STATE_CHANGE_OPT 32
+ 
++static int p1_freq[RTE_MAX_LCORE] = {0};
++
++static int
++try_read_base_frequency(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id));
++
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		close(fd);
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++	close(fd);
++
++	p1_freq[lcore_id] = atoi(buffer);
++	return p1_freq[lcore_id];
++
++
++}
++
++static int
++try_read_scaling_max_freq(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int freq;
++	int fd;
++
++	/*
++	 * If the driver is acpi_cpufreq, we can read the scaling_max_freq file
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, rte_lcore_to_cpu_id(lcore_id));
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		close(fd);
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	close(fd);
++
++	if (strncmp(buffer, "acpi-cpufreq", 12) == 0) {
++		/* we can use the scaling_max_freq to get the p1 */
++		snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id));
++		fd = open(path, O_RDONLY);
++		if (fd == -1) {
++			return -1;
++		}
++		ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++		if (bytesRead == -1) {
++			close(fd);
++			return -1;
++		}
++		buffer[bytesRead] = '\0'; // Null-terminate the buffer
++		close(fd);
++		freq = atoi(buffer) / 1000; /* convert to KHz */
++
++		/*
++		 * If the freq value ends with '1', then, turbo is enabled.
++		 * Round it down to the nearest 100. Otherwuse use the value.
++		 */
++		return (freq & ~1) * 1000; /* convert to Hz */
++	}
++	return -1;
++}
++
++static int
++try_read_msr(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	int freq;
++	uint64_t data;
++
++	/*
++	 * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, rte_lcore_to_cpu_id(lcore_id));
++	fd = open(path, O_RDONLY);
++	if (fd < 0) {
++		return -1;
++	}
++
++	if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) {
++		close(fd);
++		return -1;
++	}
++
++	close(fd);
++
++	freq = ((data >> 8) & 0xff) * 100 * 1000;
++
++	return freq;
++}
++
++
++static
++int read_sysfs_p1_freq(unsigned int lcore_id) {
++	int freq;
++
++	/* We've previously got the p1 frequency. */
++	if (p1_freq[lcore_id] != 0)
++		return p1_freq[lcore_id];
++
++	/*
++	 * Check the base_frequency file, if it's there
++	 */
++	freq = try_read_base_frequency(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Check the scaling_max_freq file for the acpi-freq driver
++	 */
++	freq = try_read_scaling_max_freq(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Try reading from the MSR register
++	 */
++	freq = try_read_msr(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	RTE_LOG(ERR, EAL, "Capacity telemetry for lcore %d not supported: no p1 frequency found",
++			lcore_id);
++
++	return -1;
++}
++
++
++int current_fds[RTE_MAX_LCORE] = {0};
++
++static
++int read_sysfs_cur_freq(unsigned int lcore_id) {
++	char path[PATH_MAX];
++
++	if (current_fds[lcore_id] == 0) {
++		snprintf(path, sizeof(path), POWER_SYSFS_CUR_PATH, rte_lcore_to_cpu_id(lcore_id));
++		current_fds[lcore_id] = open(path, O_RDONLY);
++		if (current_fds[lcore_id] == -1) {
++			return -1;
++		}
++	}
++
++	char buffer[16];
++	ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	int value = atoi(buffer);
++	return value;
++}
++
+ /* Helper function to check if the lcore is enabled.
+  * Cannot use rte_lcore_is_enabled since it only catches ROLE_RTE threads which
+  * does not include ROLE_NON_EAL threads which some application threads, for
+@@ -102,6 +287,33 @@ int rte_lcore_busyness(unsigned int lcore_id)
+ 	return telemetry_data[lcore_id].busyness;
+ }
+ 
++int rte_lcore_capacity(unsigned int lcore_id)
++{
++	const uint64_t active_thresh = RTE_LCORE_BUSYNESS_PERIOD * 1000;
++	struct lcore_telemetry *tdata;
++
++	if (lcore_id >= RTE_MAX_LCORE)
++		return -EINVAL;
++	tdata = &telemetry_data[lcore_id];
++
++	/* if the lcore is not active */
++	if (tdata->interval_ts == 0)
++		return LCORE_BUSYNESS_NOT_SET;
++	/* if the core hasn't been active in a while */
++	else if ((rte_rdtsc() - tdata->interval_ts) > active_thresh)
++		return LCORE_BUSYNESS_NOT_SET;
++
++	int cur_freq = read_sysfs_cur_freq(rte_lcore_to_cpu_id(lcore_id));
++	int busy = telemetry_data[lcore_id].busyness;
++	int p1 = read_sysfs_p1_freq(lcore_id) ;
++
++	if ((busy == -1) || (p1 <= 0)) {
++		return -1;
++	} else {
++		return busy * cur_freq / p1;
++	}
++}
++
+ int rte_lcore_busyness_enabled(void)
+ {
+ 	return __rte_lcore_telemetry_enabled;
+@@ -263,6 +475,26 @@ lcore_handle_busyness(const char *cmd __rte_unused,
+ 	return 0;
+ }
+ 
++static int
++lcore_handle_capacity(const char *cmd __rte_unused,
++		      const char *params __rte_unused, struct rte_tel_data *d)
++{
++	char corenum[64];
++	int i;
++
++	rte_tel_data_start_dict(d);
++
++	/* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */
++	for (i = 0; i < RTE_MAX_LCORE; i++) {
++		if (!lcore_enabled(i))
++			continue;
++		snprintf(corenum, sizeof(corenum), "%d", i);
++		rte_tel_data_add_dict_int(d, corenum, rte_lcore_capacity(i));
++	}
++
++	return 0;
++}
++
+ static int
+ lcore_handle_cpuset(const char *cmd __rte_unused,
+ 		    const char *params __rte_unused,
+@@ -326,6 +558,9 @@ RTE_INIT(lcore_init_telemetry)
+ 	rte_telemetry_register_cmd("/eal/lcore/busyness", lcore_handle_busyness,
+ 				   "return percentage busyness of cores");
+ 
++	rte_telemetry_register_cmd("/eal/lcore/capacity_used", lcore_handle_capacity,
++				   "return percentage capacity of cores");
++
+ 	rte_telemetry_register_cmd("/eal/lcore/busyness_enable", lcore_busyness_enable,
+ 				   "enable lcore busyness measurement");
+ 
+@@ -340,6 +575,11 @@ RTE_INIT(lcore_init_telemetry)
+ 
+ #else
+ 
++int rte_lcore_capacity(unsigned int lcore_id __rte_unused)
++{
++	return -ENOTSUP;
++}
++
+ int rte_lcore_busyness(unsigned int lcore_id __rte_unused)
+ {
+ 	return -ENOTSUP;
+diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
+index 9f4bd6e22f..132cdb9139 100644
+--- a/lib/eal/include/rte_lcore.h
++++ b/lib/eal/include/rte_lcore.h
+@@ -437,6 +437,27 @@ __rte_experimental
+ int
+ rte_lcore_busyness(unsigned int lcore_id);
+ 
++/**
++ * @warning
++ * @b EXPERIMENTAL: this API may change without prior notice.
++ *
++ * Read capacity value corresponding to an lcore.
++ * This differs from busyness in that it is related to the current usage
++ * of the lcore compared to P1 frequency, not the current frequency.
++ *
++ * @param lcore_id
++ *   Lcore to read capacity value for.
++ * @return
++ *   - value between 0 and 100 on success
++ *   - -1 if lcore is not active
++ *   - -EINVAL if lcore is invalid
++ *   - -ENOMEM if not enough memory available
++ *   - -ENOTSUP if not supported
++ */
++__rte_experimental
++int
++rte_lcore_capacity(unsigned int lcore_id);
++
+ /**
+  * @warning
+  * @b EXPERIMENTAL: this API may change without prior notice.
+diff --git a/lib/eal/version.map b/lib/eal/version.map
+index 7791f59314..5bb8429b29 100644
+--- a/lib/eal/version.map
++++ b/lib/eal/version.map
+@@ -444,6 +444,7 @@ EXPERIMENTAL {
+ 	# Added for busyness telemetry
+ 	__rte_lcore_telemetry_timestamp;
+ 	__rte_lcore_telemetry_enabled;
++	rte_lcore_capacity;
+ 	rte_lcore_busyness;
+ 	rte_lcore_busyness_enabled;
+ 	rte_lcore_busyness_enabled_set;
+-- 
+2.25.1
+
diff --git a/ipm/patches/dpdk/23.11/0001-eal-add-lcore-busyness-telemetry.patch b/ipm/patches/dpdk/23.11/0001-eal-add-lcore-busyness-telemetry.patch
new file mode 100644
index 0000000..79354fa
--- /dev/null
+++ b/ipm/patches/dpdk/23.11/0001-eal-add-lcore-busyness-telemetry.patch
@@ -0,0 +1,976 @@
+From 80cd70f677c7747f0f930c6212bbd42bcd58c02c Mon Sep 17 00:00:00 2001
+From: Anatoly Burakov <anatoly.burakov@intel.com>
+Date: Fri, 20 Sep 2024 09:08:28 +0100
+Subject: [PATCH 1/3] eal: add lcore busyness telemetry
+
+Currently, there is no way to measure lcore busyness in a passive way,
+without any modifications to the application. This patch adds a new EAL
+API that will be able to passively track core busyness.
+
+The busyness is calculated by relying on the fact that most DPDK API's
+will poll for packets. Empty polls can be counted as "idle", while
+non-empty polls can be counted as busy. To measure lcore busyness, we
+simply call the telemetry timestamping function with the number of polls
+a particular code section has processed, and count the number of cycles
+we've spent processing empty bursts. The more empty bursts we encounter,
+the less cycles we spend in "busy" state, and the less core busyness
+will be reported.
+
+In order for all of the above to work without modifications to the
+application, the library code needs to be instrumented with calls to
+the lcore telemetry busyness timestamping function. The following parts
+of DPDK are instrumented with lcore telemetry calls:
+
+- All major driver API's:
+  - ethdev
+  - cryptodev
+  - compressdev
+  - regexdev
+  - bbdev
+  - rawdev
+  - eventdev
+  - dmadev
+- Some additional libraries:
+  - ring
+  - distributor
+
+To avoid performance impact from having lcore telemetry support, a
+global variable is exported by EAL, and a call to timestamping function
+is wrapped into a macro, so that whenever telemetry is disabled, it only
+takes one additional branch and no function calls are performed. It is
+also possible to disable it at compile time by commenting out
+RTE_LCORE_BUSYNESS from build config.
+
+This patch also adds a telemetry endpoint to report lcore busyness, as
+well as telemetry endpoints to enable/disable lcore telemetry.
+
+Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
+Signed-off-by: Conor Walsh <conor.walsh@intel.com>
+Signed-off-by: David Hunt <david.hunt@intel.com>
+Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
+---
+ config/rte_config.h                         |   2 +
+ lib/bbdev/rte_bbdev.h                       |  17 +-
+ lib/compressdev/rte_compressdev.c           |   2 +
+ lib/cryptodev/rte_cryptodev.h               |   2 +
+ lib/distributor/rte_distributor.c           |  21 +-
+ lib/distributor/rte_distributor_single.c    |  14 +-
+ lib/dmadev/rte_dmadev.h                     |  16 +-
+ lib/eal/common/eal_common_lcore_telemetry.c | 319 ++++++++++++++++++++
+ lib/eal/common/meson.build                  |   1 +
+ lib/eal/freebsd/eal.c                       |   1 +
+ lib/eal/include/rte_lcore.h                 |  84 ++++++
+ lib/eal/linux/eal.c                         |   1 +
+ lib/eal/meson.build                         |   3 +
+ lib/eal/version.map                         |  58 ++++
+ lib/ethdev/rte_ethdev.h                     |   2 +
+ lib/eventdev/rte_eventdev.h                 |   9 +-
+ lib/rawdev/rte_rawdev.c                     |   5 +-
+ lib/regexdev/rte_regexdev.h                 |   5 +-
+ lib/ring/rte_ring.h                         |   8 +-
+ lib/ring/rte_ring_elem_pvt.h                |   1 +
+ 20 files changed, 545 insertions(+), 26 deletions(-)
+ create mode 100644 lib/eal/common/eal_common_lcore_telemetry.c
+
+diff --git a/config/rte_config.h b/config/rte_config.h
+index da265d7dd2..7e0083abf3 100644
+--- a/config/rte_config.h
++++ b/config/rte_config.h
+@@ -38,6 +38,8 @@
+ #define RTE_MAX_TAILQ 32
+ #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
+ #define RTE_MAX_VFIO_CONTAINERS 64
++#define RTE_LCORE_BUSYNESS 1
++#define RTE_LCORE_BUSYNESS_PERIOD 4000000ULL
+ 
+ /* bsd module defines */
+ #define RTE_CONTIGMEM_MAX_NUM_BUFS 64
+diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
+index 0cbfdd1c95..536f3a79d3 100644
+--- a/lib/bbdev/rte_bbdev.h
++++ b/lib/bbdev/rte_bbdev.h
+@@ -29,6 +29,7 @@ extern "C" {
+ 
+ #include <rte_compat.h>
+ #include <rte_cpuflags.h>
++#include <rte_lcore.h>
+ 
+ #include "rte_bbdev_op.h"
+ 
+@@ -748,7 +749,9 @@ rte_bbdev_dequeue_enc_ops(uint16_t dev_id, uint16_t queue_id,
+ {
+ 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+ 	struct rte_bbdev_queue_data *q_data = &dev->data->queues[queue_id];
+-	return dev->dequeue_enc_ops(q_data, ops, num_ops);
++	const uint16_t nb_ops = dev->dequeue_enc_ops(q_data, ops, num_ops);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_ops);
++	return nb_ops;
+ }
+ 
+ /**
+@@ -780,7 +783,9 @@ rte_bbdev_dequeue_dec_ops(uint16_t dev_id, uint16_t queue_id,
+ {
+ 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+ 	struct rte_bbdev_queue_data *q_data = &dev->data->queues[queue_id];
+-	return dev->dequeue_dec_ops(q_data, ops, num_ops);
++	const uint16_t nb_ops = dev->dequeue_dec_ops(q_data, ops, num_ops);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_ops);
++	return nb_ops;
+ }
+ 
+ 
+@@ -811,7 +816,9 @@ rte_bbdev_dequeue_ldpc_enc_ops(uint16_t dev_id, uint16_t queue_id,
+ {
+ 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+ 	struct rte_bbdev_queue_data *q_data = &dev->data->queues[queue_id];
+-	return dev->dequeue_ldpc_enc_ops(q_data, ops, num_ops);
++	const uint16_t nb_ops = dev->dequeue_ldpc_enc_ops(q_data, ops, num_ops);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_ops);
++	return nb_ops;
+ }
+ 
+ /**
+@@ -841,7 +848,9 @@ rte_bbdev_dequeue_ldpc_dec_ops(uint16_t dev_id, uint16_t queue_id,
+ {
+ 	struct rte_bbdev *dev = &rte_bbdev_devices[dev_id];
+ 	struct rte_bbdev_queue_data *q_data = &dev->data->queues[queue_id];
+-	return dev->dequeue_ldpc_dec_ops(q_data, ops, num_ops);
++	const uint16_t nb_ops = dev->dequeue_ldpc_dec_ops(q_data, ops, num_ops);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_ops);
++	return nb_ops;
+ }
+ 
+ /**
+diff --git a/lib/compressdev/rte_compressdev.c b/lib/compressdev/rte_compressdev.c
+index cf2c20a704..04a854a935 100644
+--- a/lib/compressdev/rte_compressdev.c
++++ b/lib/compressdev/rte_compressdev.c
+@@ -587,6 +587,8 @@ rte_compressdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
+ 	nb_ops = (*dev->dequeue_burst)
+ 			(dev->data->queue_pairs[qp_id], ops, nb_ops);
+ 
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_ops);
++
+ 	return nb_ops;
+ }
+ 
+diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
+index a42a4fc04e..12e986c6e4 100644
+--- a/lib/cryptodev/rte_cryptodev.h
++++ b/lib/cryptodev/rte_cryptodev.h
+@@ -1933,6 +1933,8 @@ rte_cryptodev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
+ 		rte_rcu_qsbr_thread_offline(list->qsbr, 0);
+ 	}
+ #endif
++
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_ops);
+ 	return nb_ops;
+ }
+ 
+diff --git a/lib/distributor/rte_distributor.c b/lib/distributor/rte_distributor.c
+index 2ecb95c3e5..0c8f9a1dea 100644
+--- a/lib/distributor/rte_distributor.c
++++ b/lib/distributor/rte_distributor.c
+@@ -57,6 +57,8 @@ rte_distributor_request_pkt(struct rte_distributor *d,
+ 
+ 		while (rte_rdtsc() < t)
+ 			rte_pause();
++		/* this was an empty poll */
++		RTE_LCORE_TELEMETRY_TIMESTAMP(0);
+ 	}
+ 
+ 	/*
+@@ -135,24 +137,29 @@ rte_distributor_get_pkt(struct rte_distributor *d,
+ 
+ 	if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) {
+ 		if (return_count <= 1) {
++			uint16_t cnt;
+ 			pkts[0] = rte_distributor_get_pkt_single(d->d_single,
+-				worker_id, return_count ? oldpkt[0] : NULL);
+-			return (pkts[0]) ? 1 : 0;
+-		} else
+-			return -EINVAL;
++								 worker_id,
++								 return_count ? oldpkt[0] : NULL);
++			cnt = (pkts[0] != NULL) ? 1 : 0;
++			RTE_LCORE_TELEMETRY_TIMESTAMP(cnt);
++			return cnt;
++		}
++		return -EINVAL;
+ 	}
+ 
+ 	rte_distributor_request_pkt(d, worker_id, oldpkt, return_count);
+ 
+-	count = rte_distributor_poll_pkt(d, worker_id, pkts);
+-	while (count == -1) {
++	while ((count = rte_distributor_poll_pkt(d, worker_id, pkts)) == -1) {
+ 		uint64_t t = rte_rdtsc() + 100;
+ 
+ 		while (rte_rdtsc() < t)
+ 			rte_pause();
+ 
+-		count = rte_distributor_poll_pkt(d, worker_id, pkts);
++		/* this was an empty poll */
++		RTE_LCORE_TELEMETRY_TIMESTAMP(0);
+ 	}
++	RTE_LCORE_TELEMETRY_TIMESTAMP(count);
+ 	return count;
+ }
+ 
+diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c
+index d4b3e12648..3c18805ecd 100644
+--- a/lib/distributor/rte_distributor_single.c
++++ b/lib/distributor/rte_distributor_single.c
+@@ -31,8 +31,13 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d,
+ 	union rte_distributor_buffer_single *buf = &d->bufs[worker_id];
+ 	int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
+ 			| RTE_DISTRIB_GET_BUF;
+-	RTE_WAIT_UNTIL_MASKED(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK,
+-		==, 0, rte_memory_order_relaxed);
++
++	while ((__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
++			& RTE_DISTRIB_FLAGS_MASK) != 0) {
++		rte_pause();
++		/* this was an empty poll */
++		RTE_LCORE_TELEMETRY_TIMESTAMP(0);
++	}
+ 
+ 	/* Sync with distributor on GET_BUF flag. */
+ 	rte_atomic_store_explicit(&buf->bufptr64, req, rte_memory_order_release);
+@@ -59,8 +64,11 @@ rte_distributor_get_pkt_single(struct rte_distributor_single *d,
+ {
+ 	struct rte_mbuf *ret;
+ 	rte_distributor_request_pkt_single(d, worker_id, oldpkt);
+-	while ((ret = rte_distributor_poll_pkt_single(d, worker_id)) == NULL)
++	while ((ret = rte_distributor_poll_pkt_single(d, worker_id)) == NULL) {
+ 		rte_pause();
++		/* this was an empty poll */
++		RTE_LCORE_TELEMETRY_TIMESTAMP(0);
++	}
+ 	return ret;
+ }
+ 
+diff --git a/lib/dmadev/rte_dmadev.h b/lib/dmadev/rte_dmadev.h
+index 450b81c307..e32d089a48 100644
+--- a/lib/dmadev/rte_dmadev.h
++++ b/lib/dmadev/rte_dmadev.h
+@@ -148,6 +148,8 @@
+ 
+ #include <rte_bitops.h>
+ #include <rte_common.h>
++#include <rte_compat.h>
++#include <rte_lcore.h>
+ 
+ #ifdef __cplusplus
+ extern "C" {
+@@ -995,7 +997,7 @@ rte_dma_completed(int16_t dev_id, uint16_t vchan, const uint16_t nb_cpls,
+ 		  uint16_t *last_idx, bool *has_error)
+ {
+ 	struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
+-	uint16_t idx;
++	uint16_t idx, nb_ops;
+ 	bool err;
+ 
+ #ifdef RTE_DMADEV_DEBUG
+@@ -1019,8 +1021,10 @@ rte_dma_completed(int16_t dev_id, uint16_t vchan, const uint16_t nb_cpls,
+ 		has_error = &err;
+ 
+ 	*has_error = false;
+-	return (*obj->completed)(obj->dev_private, vchan, nb_cpls, last_idx,
+-				 has_error);
++	nb_ops = (*obj->completed)(obj->dev_private, vchan, nb_cpls, last_idx,
++				   has_error);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_ops);
++	return nb_ops;
+ }
+ 
+ /**
+@@ -1055,7 +1059,7 @@ rte_dma_completed_status(int16_t dev_id, uint16_t vchan,
+ 			 enum rte_dma_status_code *status)
+ {
+ 	struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
+-	uint16_t idx;
++	uint16_t idx, nb_ops;
+ 
+ #ifdef RTE_DMADEV_DEBUG
+ 	if (!rte_dma_is_valid(dev_id) || nb_cpls == 0 || status == NULL)
+@@ -1067,8 +1071,10 @@ rte_dma_completed_status(int16_t dev_id, uint16_t vchan,
+ 	if (last_idx == NULL)
+ 		last_idx = &idx;
+ 
+-	return (*obj->completed_status)(obj->dev_private, vchan, nb_cpls,
++	nb_ops = (*obj->completed_status)(obj->dev_private, vchan, nb_cpls,
+ 					last_idx, status);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_ops);
++	return nb_ops;
+ }
+ 
+ /**
+diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c
+new file mode 100644
+index 0000000000..1478e5a48a
+--- /dev/null
++++ b/lib/eal/common/eal_common_lcore_telemetry.c
+@@ -0,0 +1,319 @@
++/* SPDX-License-Identifier: BSD-3-Clause
++ * Copyright(c) 2022 Intel Corporation
++ */
++
++#include <unistd.h>
++#include <limits.h>
++#include <string.h>
++
++#include <rte_common.h>
++#include <rte_cycles.h>
++#include <rte_errno.h>
++#include <rte_lcore.h>
++
++#ifdef RTE_LCORE_BUSYNESS
++#include <rte_telemetry.h>
++#endif
++
++int __rte_lcore_telemetry_enabled;
++
++#ifdef RTE_LCORE_BUSYNESS
++
++#include "eal_private.h"
++
++struct lcore_telemetry {
++	int busyness;
++	/**< Calculated busyness (gets set/returned by the API) */
++	int raw_busyness;
++	/**< Calculated busyness times 100. */
++	uint64_t interval_ts;
++	/**< when previous telemetry interval started */
++	uint64_t empty_cycles;
++	/**< empty cycle count since last interval */
++	uint64_t last_poll_ts;
++	/**< last poll timestamp */
++	bool last_empty;
++	/**< if last poll was empty */
++	unsigned int contig_poll_cnt;
++	/**< contiguous (always empty/non empty) poll counter */
++} __rte_cache_aligned;
++
++static struct lcore_telemetry *telemetry_data;
++
++#define LCORE_BUSYNESS_MAX 100
++#define LCORE_BUSYNESS_NOT_SET -1
++#define LCORE_BUSYNESS_MIN 0
++
++#define SMOOTH_COEFF 5
++#define STATE_CHANGE_OPT 32
++
++/* Helper function to check if the lcore is enabled.
++ * Cannot use rte_lcore_is_enabled since it only catches ROLE_RTE threads which
++ * does not include ROLE_NON_EAL threads which some application threads, for
++ * example OvS polling threads, are marked as.
++ */
++static int
++lcore_enabled(unsigned int lcore_id)
++{
++	enum rte_lcore_role_t role = rte_eal_lcore_role(lcore_id);
++
++	return role == ROLE_RTE || role == ROLE_NON_EAL;
++}
++
++static void lcore_config_init(void)
++{
++	struct lcore_telemetry *td;
++	int lcore_id;
++
++	/* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */
++	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
++		if (!lcore_enabled(lcore_id))
++			continue;
++
++		td = &telemetry_data[lcore_id];
++
++		td->interval_ts = 0;
++		td->last_poll_ts = 0;
++		td->empty_cycles = 0;
++		td->last_empty = true;
++		td->contig_poll_cnt = 0;
++		td->busyness = LCORE_BUSYNESS_NOT_SET;
++		td->raw_busyness = 0;
++	}
++}
++
++int rte_lcore_busyness(unsigned int lcore_id)
++{
++	const uint64_t active_thresh = RTE_LCORE_BUSYNESS_PERIOD * 1000;
++	struct lcore_telemetry *tdata;
++
++	if (lcore_id >= RTE_MAX_LCORE)
++		return -EINVAL;
++	tdata = &telemetry_data[lcore_id];
++
++	/* if the lcore is not active */
++	if (tdata->interval_ts == 0)
++		return LCORE_BUSYNESS_NOT_SET;
++	/* if the core hasn't been active in a while */
++	else if ((rte_rdtsc() - tdata->interval_ts) > active_thresh)
++		return LCORE_BUSYNESS_NOT_SET;
++
++	/* this core is active, report its busyness */
++	return telemetry_data[lcore_id].busyness;
++}
++
++int rte_lcore_busyness_enabled(void)
++{
++	return __rte_lcore_telemetry_enabled;
++}
++
++void rte_lcore_busyness_enabled_set(int enable)
++{
++	__rte_lcore_telemetry_enabled = !!enable;
++
++	if (!enable)
++		lcore_config_init();
++}
++
++static inline int calc_raw_busyness(const struct lcore_telemetry *tdata,
++				    const uint64_t empty, const uint64_t total)
++{
++	/*
++	 * we don't want to use floating point math here, but we want for our
++	 * busyness to react smoothly to sudden changes, while still keeping the
++	 * accuracy and making sure that over time the average follows busyness
++	 * as measured just-in-time. therefore, we will calculate the average
++	 * busyness using integer math, but shift the decimal point two places
++	 * to the right, so that 100.0 becomes 10000. this allows us to report
++	 * integer values (0..100) while still allowing ourselves to follow the
++	 * just-in-time measurements when we calculate our averages.
++	 */
++	const int max_raw_idle = LCORE_BUSYNESS_MAX * 100;
++
++	const int prev_raw_idle = max_raw_idle - tdata->raw_busyness;
++
++	/* calculate rate of idle cycles, times 100 */
++	const int cur_raw_idle = (int)((empty * max_raw_idle) / total);
++
++	/* smoothen the idleness */
++	const int smoothened_idle =
++			(cur_raw_idle + prev_raw_idle * (SMOOTH_COEFF - 1)) / SMOOTH_COEFF;
++
++	/* convert idleness back to busyness */
++	return max_raw_idle - smoothened_idle;
++}
++
++void __rte_lcore_telemetry_timestamp(uint16_t nb_rx)
++{
++	const unsigned int lcore_id = rte_lcore_id();
++	uint64_t interval_ts, empty_cycles, cur_tsc, last_poll_ts;
++	struct lcore_telemetry *tdata;
++	const bool empty = nb_rx == 0;
++	uint64_t diff_int, diff_last;
++	bool last_empty;
++
++	/* This telemetry is not supported for unregistered non-EAL threads */
++	if (lcore_id >= RTE_MAX_LCORE) {
++		RTE_LOG(DEBUG, EAL,
++				"Lcore telemetry not supported on unregistered non-EAL thread %d",
++				lcore_id);
++		return;
++	}
++
++	tdata = &telemetry_data[lcore_id];
++	last_empty = tdata->last_empty;
++
++	/* optimization: don't do anything if status hasn't changed */
++	if (last_empty == empty && tdata->contig_poll_cnt++ < STATE_CHANGE_OPT)
++		return;
++	/* status changed or we're waiting for too long, reset counter */
++	tdata->contig_poll_cnt = 0;
++
++	cur_tsc = rte_rdtsc();
++
++	interval_ts = tdata->interval_ts;
++	empty_cycles = tdata->empty_cycles;
++	last_poll_ts = tdata->last_poll_ts;
++
++	diff_int = cur_tsc - interval_ts;
++	diff_last = cur_tsc - last_poll_ts;
++
++	/* is this the first time we're here? */
++	if (interval_ts == 0) {
++		tdata->busyness = LCORE_BUSYNESS_MIN;
++		tdata->raw_busyness = 0;
++		tdata->interval_ts = cur_tsc;
++		tdata->empty_cycles = 0;
++		tdata->contig_poll_cnt = 0;
++		goto end;
++	}
++
++	/* update the empty counter if we got an empty poll earlier */
++	if (last_empty)
++		empty_cycles += diff_last;
++
++	/* have we passed the interval? */
++	if (diff_int > RTE_LCORE_BUSYNESS_PERIOD) {
++		int raw_busyness;
++
++		/* get updated busyness value */
++		raw_busyness = calc_raw_busyness(tdata, empty_cycles, diff_int);
++
++		/* set a new interval, reset empty counter */
++		tdata->interval_ts = cur_tsc;
++		tdata->empty_cycles = 0;
++		tdata->raw_busyness = raw_busyness;
++		/* bring busyness back to 0..100 range, biased to round up */
++		tdata->busyness = (raw_busyness + 50) / 100;
++	} else
++		/* we may have updated empty counter */
++		tdata->empty_cycles = empty_cycles;
++
++end:
++	/* update status for next poll */
++	tdata->last_poll_ts = cur_tsc;
++	tdata->last_empty = empty;
++}
++
++static int
++lcore_busyness_enable(const char *cmd __rte_unused,
++		      const char *params __rte_unused,
++		      struct rte_tel_data *d)
++{
++	rte_lcore_busyness_enabled_set(1);
++
++	rte_tel_data_start_dict(d);
++
++	rte_tel_data_add_dict_int(d, "busyness_enabled", 1);
++
++	return 0;
++}
++
++static int
++lcore_busyness_disable(const char *cmd __rte_unused,
++		       const char *params __rte_unused,
++		       struct rte_tel_data *d)
++{
++	rte_lcore_busyness_enabled_set(0);
++
++	rte_tel_data_start_dict(d);
++
++	rte_tel_data_add_dict_int(d, "busyness_enabled", 0);
++
++	return 0;
++}
++
++static int
++lcore_handle_busyness(const char *cmd __rte_unused,
++		      const char *params __rte_unused, struct rte_tel_data *d)
++{
++	char corenum[64];
++	int i;
++
++	rte_tel_data_start_dict(d);
++
++	/* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */
++	for (i = 0; i < RTE_MAX_LCORE; i++) {
++		if (!lcore_enabled(i))
++			continue;
++		snprintf(corenum, sizeof(corenum), "%d", i);
++		rte_tel_data_add_dict_int(d, corenum, rte_lcore_busyness(i));
++	}
++
++	return 0;
++}
++
++void
++eal_lcore_telemetry_free(void)
++{
++	if (telemetry_data != NULL) {
++		free(telemetry_data);
++		telemetry_data = NULL;
++	}
++}
++
++RTE_INIT(lcore_init_telemetry)
++{
++	telemetry_data = calloc(RTE_MAX_LCORE, sizeof(telemetry_data[0]));
++	if (telemetry_data == NULL)
++		rte_panic("Could not init lcore telemetry data: Out of memory\n");
++
++	lcore_config_init();
++
++	rte_telemetry_register_cmd("/eal/lcore/busyness", lcore_handle_busyness,
++				   "return percentage busyness of cores");
++
++	rte_telemetry_register_cmd("/eal/lcore/busyness_enable", lcore_busyness_enable,
++				   "enable lcore busyness measurement");
++
++	rte_telemetry_register_cmd("/eal/lcore/busyness_disable", lcore_busyness_disable,
++				   "disable lcore busyness measurement");
++
++	__rte_lcore_telemetry_enabled = true;
++}
++
++#else
++
++int rte_lcore_busyness(unsigned int lcore_id __rte_unused)
++{
++	return -ENOTSUP;
++}
++
++int rte_lcore_busyness_enabled(void)
++{
++	return -ENOTSUP;
++}
++
++void rte_lcore_busyness_enabled_set(int enable __rte_unused)
++{
++}
++
++void __rte_lcore_telemetry_timestamp(uint16_t nb_rx __rte_unused)
++{
++}
++
++void eal_lcore_telemetry_free(void)
++{
++}
++
++#endif
+diff --git a/lib/eal/common/meson.build b/lib/eal/common/meson.build
+index 22a626ba6f..1ca6f9a420 100644
+--- a/lib/eal/common/meson.build
++++ b/lib/eal/common/meson.build
+@@ -17,6 +17,7 @@ sources += files(
+         'eal_common_hexdump.c',
+         'eal_common_interrupts.c',
+         'eal_common_launch.c',
++        'eal_common_lcore_telemetry.c',
+         'eal_common_lcore.c',
+         'eal_common_mcfg.c',
+         'eal_common_memalloc.c',
+diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
+index 568e06e9ed..1c27688e39 100644
+--- a/lib/eal/freebsd/eal.c
++++ b/lib/eal/freebsd/eal.c
+@@ -929,6 +929,7 @@ rte_eal_cleanup(void)
+ 	eal_bus_cleanup();
+ 	rte_trace_save();
+ 	eal_trace_fini();
++	eal_lcore_telemetry_free();
+ 	rte_eal_alarm_cleanup();
+ 	/* after this point, any DPDK pointers will become dangling */
+ 	rte_eal_memory_detach();
+diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
+index 7deae47af3..3c64774bcb 100644
+--- a/lib/eal/include/rte_lcore.h
++++ b/lib/eal/include/rte_lcore.h
+@@ -407,6 +407,90 @@ rte_thread_register(void);
+ void
+ rte_thread_unregister(void);
+ 
++/**
++ * @warning
++ * @b EXPERIMENTAL: this API may change without prior notice.
++ *
++ * Read busyness value corresponding to an lcore.
++ *
++ * @param lcore_id
++ *   Lcore to read busyness value for.
++ * @return
++ *   - value between 0 and 100 on success
++ *   - -1 if lcore is not active
++ *   - -EINVAL if lcore is invalid
++ *   - -ENOMEM if not enough memory available
++ *   - -ENOTSUP if not supported
++ */
++__rte_experimental
++int
++rte_lcore_busyness(unsigned int lcore_id);
++
++/**
++ * @warning
++ * @b EXPERIMENTAL: this API may change without prior notice.
++ *
++ * Check if lcore busyness telemetry is enabled.
++ *
++ * @return
++ *   - 1 if lcore telemetry is enabled
++ *   - 0 if lcore telemetry is disabled
++ *   - -ENOTSUP if not lcore telemetry supported
++ */
++__rte_experimental
++int
++rte_lcore_busyness_enabled(void);
++
++/**
++ * @warning
++ * @b EXPERIMENTAL: this API may change without prior notice.
++ *
++ * Enable or disable busyness telemetry.
++ *
++ * @param enable
++ *   1 to enable, 0 to disable
++ */
++__rte_experimental
++void
++rte_lcore_busyness_enabled_set(int enable);
++
++/**
++ * @warning
++ * @b EXPERIMENTAL: this API may change without prior notice.
++ *
++ * Lcore telemetry timestamping function.
++ *
++ * @param nb_rx
++ *   Number of buffers processed by lcore.
++ */
++__rte_experimental
++void
++__rte_lcore_telemetry_timestamp(uint16_t nb_rx);
++
++/** @internal lcore telemetry enabled status */
++extern int __rte_lcore_telemetry_enabled;
++
++/** @internal free memory allocated for lcore telemetry */
++void
++eal_lcore_telemetry_free(void);
++
++/**
++ * Call lcore telemetry timestamp function.
++ *
++ * @param nb_rx
++ *   Number of buffers processed by lcore.
++ */
++#ifdef RTE_LCORE_BUSYNESS
++#define RTE_LCORE_TELEMETRY_TIMESTAMP(nb_rx)                    \
++	do {                                                    \
++		if (__rte_lcore_telemetry_enabled)              \
++			__rte_lcore_telemetry_timestamp(nb_rx); \
++	} while (0)
++#else
++#define RTE_LCORE_TELEMETRY_TIMESTAMP(nb_rx) \
++	while (0) {}
++#endif
++
+ #ifdef __cplusplus
+ }
+ #endif
+diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
+index 57da058cec..7734b4b8a4 100644
+--- a/lib/eal/linux/eal.c
++++ b/lib/eal/linux/eal.c
+@@ -1373,6 +1373,7 @@ rte_eal_cleanup(void)
+ 	eal_bus_cleanup();
+ 	rte_trace_save();
+ 	eal_trace_fini();
++	eal_lcore_telemetry_free();
+ 	eal_mp_dev_hotplug_cleanup();
+ 	rte_eal_alarm_cleanup();
+ 	/* after this point, any DPDK pointers will become dangling */
+diff --git a/lib/eal/meson.build b/lib/eal/meson.build
+index e1d6c4cf17..833d33138c 100644
+--- a/lib/eal/meson.build
++++ b/lib/eal/meson.build
+@@ -17,6 +17,9 @@ subdir(arch_subdir)
+ deps += ['log', 'kvargs']
+ if not is_windows
+     deps += ['telemetry']
++else
++    # core busyness telemetry depends on telemetry library
++    dpdk_conf.set('RTE_LCORE_BUSYNESS', false)
+ endif
+ if dpdk_conf.has('RTE_USE_LIBBSD')
+     ext_deps += libbsd
+diff --git a/lib/eal/version.map b/lib/eal/version.map
+index 5e0cd47c82..a4451d58eb 100644
+--- a/lib/eal/version.map
++++ b/lib/eal/version.map
+@@ -385,6 +385,64 @@ EXPERIMENTAL {
+ 	# added in 20.11
+ 	__rte_eal_trace_generic_size_t; # WINDOWS_NO_EXPORT
+ 	rte_cpu_get_intrinsics_support; # WINDOWS_NO_EXPORT
++	rte_service_lcore_may_be_active;
++	rte_vect_get_max_simd_bitwidth;
++	rte_vect_set_max_simd_bitwidth;
++
++	# added in 21.02
++	rte_power_monitor; # WINDOWS_NO_EXPORT
++	rte_power_monitor_wakeup; # WINDOWS_NO_EXPORT
++	rte_power_pause; # WINDOWS_NO_EXPORT
++
++	# added in 21.05
++	rte_devargs_reset;
++	rte_intr_callback_unregister_sync;
++	rte_thread_key_create;
++	rte_thread_key_delete;
++	rte_thread_value_get;
++	rte_thread_value_set;
++	rte_version_minor;
++	rte_version_month;
++	rte_version_prefix;
++	rte_version_release;
++	rte_version_suffix;
++	rte_version_year;
++
++	# added in 21.08
++	rte_power_monitor_multi; # WINDOWS_NO_EXPORT
++
++	# added in 21.11
++	rte_intr_fd_get;
++	rte_intr_fd_set;
++	rte_intr_instance_alloc;
++	rte_intr_instance_free;
++	rte_intr_type_get;
++	rte_intr_type_set;
++
++	# added in 22.07
++	rte_drand;
++	rte_thread_get_affinity_by_id;
++	rte_thread_get_priority;
++	rte_thread_self;
++	rte_thread_set_affinity_by_id;
++	rte_thread_set_priority;
++
++	# added in 22.11
++	rte_thread_attr_get_affinity;
++	rte_thread_attr_init;
++	rte_thread_attr_set_affinity;
++	rte_thread_attr_set_priority;
++	rte_thread_create;
++	rte_thread_detach;
++	rte_thread_equal;
++	rte_thread_join;
++
++	# added in 20.11
++	__rte_lcore_telemetry_timestamp;
++	__rte_lcore_telemetry_enabled;
++	rte_lcore_busyness;
++	rte_lcore_busyness_enabled;
++	rte_lcore_busyness_enabled_set;
+ 
+ 	# added in 23.03
+ 	rte_lcore_register_usage_cb;
+diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
+index 545799c341..e89eef37ee 100644
+--- a/lib/ethdev/rte_ethdev.h
++++ b/lib/ethdev/rte_ethdev.h
+@@ -6099,6 +6099,8 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
+ #endif
+ 
+ 	rte_ethdev_trace_rx_burst(port_id, queue_id, (void **)rx_pkts, nb_rx);
++
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_rx);
+ 	return nb_rx;
+ }
+ 
+diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
+index 7fd9016ca7..ce4e57b60d 100644
+--- a/lib/eventdev/rte_eventdev.h
++++ b/lib/eventdev/rte_eventdev.h
+@@ -2408,6 +2408,7 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
+ 			uint16_t nb_events, uint64_t timeout_ticks)
+ {
+ 	const struct rte_event_fp_ops *fp_ops;
++	uint16_t nb_evts;
+ 	void *port;
+ 
+ 	fp_ops = &rte_event_fp_ops[dev_id];
+@@ -2430,10 +2431,12 @@ rte_event_dequeue_burst(uint8_t dev_id, uint8_t port_id, struct rte_event ev[],
+ 	 * requests nb_events as const one
+ 	 */
+ 	if (nb_events == 1)
+-		return (fp_ops->dequeue)(port, ev, timeout_ticks);
++		nb_evts = (fp_ops->dequeue)(port, ev, timeout_ticks);
+ 	else
+-		return (fp_ops->dequeue_burst)(port, ev, nb_events,
+-					       timeout_ticks);
++		nb_evts = (fp_ops->dequeue_burst)(port, ev, nb_events,
++					timeout_ticks);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_evts);
++	return nb_evts;
+ }
+ 
+ #define RTE_EVENT_DEV_MAINT_OP_FLUSH          (1 << 0)
+diff --git a/lib/rawdev/rte_rawdev.c b/lib/rawdev/rte_rawdev.c
+index 4f8897b639..2403c78925 100644
+--- a/lib/rawdev/rte_rawdev.c
++++ b/lib/rawdev/rte_rawdev.c
+@@ -237,13 +237,16 @@ rte_rawdev_dequeue_buffers(uint16_t dev_id,
+ 			   rte_rawdev_obj_t context)
+ {
+ 	struct rte_rawdev *dev;
++	int nb_ops;
+ 
+ 	RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+ 	dev = &rte_rawdevs[dev_id];
+ 
+ 	if (*dev->dev_ops->dequeue_bufs == NULL)
+ 		return -ENOTSUP;
+-	return (*dev->dev_ops->dequeue_bufs)(dev, buffers, count, context);
++	nb_ops = (*dev->dev_ops->dequeue_bufs)(dev, buffers, count, context);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_ops);
++	return nb_ops;
+ }
+ 
+ int
+diff --git a/lib/regexdev/rte_regexdev.h b/lib/regexdev/rte_regexdev.h
+index d50af775b5..7b243a0866 100644
+--- a/lib/regexdev/rte_regexdev.h
++++ b/lib/regexdev/rte_regexdev.h
+@@ -1530,6 +1530,7 @@ rte_regexdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
+ 			   struct rte_regex_ops **ops, uint16_t nb_ops)
+ {
+ 	struct rte_regexdev *dev = &rte_regex_devices[dev_id];
++	uint16_t deq_ops;
+ #ifdef RTE_LIBRTE_REGEXDEV_DEBUG
+ 	RTE_REGEXDEV_VALID_DEV_ID_OR_ERR_RET(dev_id, -EINVAL);
+ 	if (*dev->dequeue == NULL)
+@@ -1539,7 +1540,9 @@ rte_regexdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
+ 		return -EINVAL;
+ 	}
+ #endif
+-	return (*dev->dequeue)(dev, qp_id, ops, nb_ops);
++	deq_ops = (*dev->dequeue)(dev, qp_id, ops, nb_ops);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(deq_ops);
++	return deq_ops;
+ }
+ 
+ #ifdef __cplusplus
+diff --git a/lib/ring/rte_ring.h b/lib/ring/rte_ring.h
+index c709f30497..057542dcb0 100644
+--- a/lib/ring/rte_ring.h
++++ b/lib/ring/rte_ring.h
+@@ -411,8 +411,10 @@ static __rte_always_inline unsigned int
+ rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n,
+ 		unsigned int *available)
+ {
+-	return rte_ring_dequeue_bulk_elem(r, obj_table, sizeof(void *),
++	uint32_t nb_rx =  rte_ring_dequeue_bulk_elem(r, obj_table, sizeof(void *),
+ 			n, available);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_rx);
++	return nb_rx;
+ }
+ 
+ /**
+@@ -811,8 +813,10 @@ static __rte_always_inline unsigned int
+ rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table,
+ 		unsigned int n, unsigned int *available)
+ {
+-	return rte_ring_dequeue_burst_elem(r, obj_table, sizeof(void *),
++	uint32_t nb_rx = rte_ring_dequeue_burst_elem(r, obj_table, sizeof(void *),
+ 			n, available);
++	RTE_LCORE_TELEMETRY_TIMESTAMP(nb_rx);
++	return nb_rx;
+ }
+ 
+ #ifdef __cplusplus
+diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h
+index 4b80f58980..e2a72e3ea7 100644
+--- a/lib/ring/rte_ring_elem_pvt.h
++++ b/lib/ring/rte_ring_elem_pvt.h
+@@ -385,6 +385,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
+ end:
+ 	if (available != NULL)
+ 		*available = entries - n;
++	RTE_LCORE_TELEMETRY_TIMESTAMP(n);
+ 	return n;
+ }
+ 
+-- 
+2.25.1
+
diff --git a/ipm/patches/dpdk/23.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch b/ipm/patches/dpdk/23.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
new file mode 100644
index 0000000..c2bac0e
--- /dev/null
+++ b/ipm/patches/dpdk/23.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch
@@ -0,0 +1,79 @@
+From f689846f602caddf6a0f6c013c3dbb6f0974dec2 Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Thu, 1 Aug 2024 16:11:56 +0000
+Subject: [PATCH 2/3] eal: add cpuset lcore telemetry entries
+
+Expose per-lcore cpuset information to telemetry.
+
+Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
+---
+ lib/eal/common/eal_common_lcore_telemetry.c | 46 +++++++++++++++++++++
+ 1 file changed, 46 insertions(+)
+
+diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c
+index 1478e5a48a..f01ccd9a65 100644
+--- a/lib/eal/common/eal_common_lcore_telemetry.c
++++ b/lib/eal/common/eal_common_lcore_telemetry.c
+@@ -263,6 +263,49 @@ lcore_handle_busyness(const char *cmd __rte_unused,
+ 	return 0;
+ }
+ 
++static int
++lcore_handle_cpuset(const char *cmd __rte_unused,
++		    const char *params __rte_unused,
++		    struct rte_tel_data *d)
++{
++	char corenum[64];
++	int i;
++
++	rte_tel_data_start_dict(d);
++
++	/* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */
++	for (i = 0; i < RTE_MAX_LCORE; i++) {
++		const struct lcore_config *cfg = &lcore_config[i];
++		const rte_cpuset_t *cpuset = &cfg->cpuset;
++		struct rte_tel_data *ld;
++		unsigned int cpu;
++
++		if (!lcore_enabled(i))
++			continue;
++
++		/* create an array of integers */
++		ld = rte_tel_data_alloc();
++		if (ld == NULL)
++			return -ENOMEM;
++		rte_tel_data_start_array(ld, RTE_TEL_INT_VAL);
++
++		/* add cpu ID's from cpuset to the array */
++		for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
++			if (!CPU_ISSET(cpu, cpuset))
++				continue;
++			rte_tel_data_add_array_int(ld, cpu);
++		}
++
++		/* add array to the per-lcore container */
++		snprintf(corenum, sizeof(corenum), "%d", i);
++
++		/* tell telemetry library to free this array automatically */
++		rte_tel_data_add_dict_container(d, corenum, ld, 0);
++	}
++
++	return 0;
++}
++
+ void
+ eal_lcore_telemetry_free(void)
+ {
+@@ -289,6 +332,9 @@ RTE_INIT(lcore_init_telemetry)
+ 	rte_telemetry_register_cmd("/eal/lcore/busyness_disable", lcore_busyness_disable,
+ 				   "disable lcore busyness measurement");
+ 
++	rte_telemetry_register_cmd("/eal/lcore/cpuset", lcore_handle_cpuset,
++				   "list physical core affinity for each lcore");
++
+ 	__rte_lcore_telemetry_enabled = true;
+ }
+ 
+-- 
+2.25.1
+
diff --git a/ipm/patches/dpdk/23.11/0003-add-capacity-endpoint-to-telemetry-thread.patch b/ipm/patches/dpdk/23.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
new file mode 100644
index 0000000..6be6861
--- /dev/null
+++ b/ipm/patches/dpdk/23.11/0003-add-capacity-endpoint-to-telemetry-thread.patch
@@ -0,0 +1,357 @@
+From 2a8e1d477157e299f02fb9e64aa5d197d2caee16 Mon Sep 17 00:00:00 2001
+From: David Hunt <david.hunt@intel.com>
+Date: Fri, 20 Sep 2024 09:11:45 +0100
+Subject: [PATCH 3/3] add capacity endpoint to telemetry thread
+
+Busyness is calculated on how busy the current core is, ignoring the
+current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows
+as 100% busy at 1GHz.
+
+This patch adds a new 'capacity' metric that shows a percentage based on
+the P1 (base) freqency of the core, so that if the core is 50% busy at
+P1, it should show 50% regardless of what the current frequency is.
+
+Signed-off-by: David Hunt <david.hunt@intel.com>
+---
+ lib/eal/common/eal_common_lcore_telemetry.c | 241 ++++++++++++++++++++
+ lib/eal/include/rte_lcore.h                 |  21 ++
+ lib/eal/version.map                         |   1 +
+ 3 files changed, 263 insertions(+)
+
+diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c
+index f01ccd9a65..1c6d085a55 100644
+--- a/lib/eal/common/eal_common_lcore_telemetry.c
++++ b/lib/eal/common/eal_common_lcore_telemetry.c
+@@ -10,9 +10,18 @@
+ #include <rte_cycles.h>
+ #include <rte_errno.h>
+ #include <rte_lcore.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <fcntl.h>
+ 
+ #ifdef RTE_LCORE_BUSYNESS
+ #include <rte_telemetry.h>
++#define MSR_PLATFORM_INFO 0xCE
++#define POWER_SYSFS_CUR_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
++#define POWER_SYSFS_BASE_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
++#define POWER_SYSFS_SCALING_DRIVER_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver"
++#define POWER_SYSFS_SCALING_MAX_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
++#define POWER_SYSFS_MSR_PATH    "/dev/cpu/%u/msr"
+ #endif
+ 
+ int __rte_lcore_telemetry_enabled;
+@@ -47,6 +56,183 @@ static struct lcore_telemetry *telemetry_data;
+ #define SMOOTH_COEFF 5
+ #define STATE_CHANGE_OPT 32
+ 
++static int p1_freq[RTE_MAX_LCORE] = {0};
++
++static int
++try_read_base_frequency(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id));
++
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		close(fd);
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++	close(fd);
++
++	p1_freq[lcore_id] = atoi(buffer);
++	return p1_freq[lcore_id];
++
++
++}
++
++static int
++try_read_scaling_max_freq(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int freq;
++	int fd;
++
++	/*
++	 * If the driver is acpi_cpufreq, we can read the scaling_max_freq file
++	 */
++
++	snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, rte_lcore_to_cpu_id(lcore_id));
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		close(fd);
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	close(fd);
++
++	if (strncmp(buffer, "acpi-cpufreq", 12) == 0) {
++		/* we can use the scaling_max_freq to get the p1 */
++		snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id));
++		fd = open(path, O_RDONLY);
++		if (fd == -1) {
++			return -1;
++		}
++		ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++		if (bytesRead == -1) {
++			close(fd);
++			return -1;
++		}
++		buffer[bytesRead] = '\0'; // Null-terminate the buffer
++		close(fd);
++		freq = atoi(buffer) / 1000; /* convert to KHz */
++
++		/*
++		 * If the freq value ends with '1', then, turbo is enabled.
++		 * Round it down to the nearest 100. Otherwuse use the value.
++		 */
++		return (freq & ~1) * 1000; /* convert to Hz */
++	}
++	return -1;
++}
++
++static int
++try_read_msr(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	int freq;
++	uint64_t data;
++
++	/*
++	 * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, rte_lcore_to_cpu_id(lcore_id));
++	fd = open(path, O_RDONLY);
++	if (fd < 0) {
++		return -1;
++	}
++
++	if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) {
++		close(fd);
++		return -1;
++	}
++
++	close(fd);
++
++	freq = ((data >> 8) & 0xff) * 100 * 1000;
++
++	return freq;
++}
++
++
++static
++int read_sysfs_p1_freq(unsigned int lcore_id) {
++	int freq;
++
++	/* We've previously got the p1 frequency. */
++	if (p1_freq[lcore_id] != 0)
++		return p1_freq[lcore_id];
++
++	/*
++	 * Check the base_frequency file, if it's there
++	 */
++	freq = try_read_base_frequency(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Check the scaling_max_freq file for the acpi-freq driver
++	 */
++	freq = try_read_scaling_max_freq(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Try reading from the MSR register
++	 */
++	freq = try_read_msr(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	RTE_LOG(ERR, EAL, "Capacity telemetry for lcore %d not supported: no p1 frequency found",
++			lcore_id);
++
++	return -1;
++}
++
++
++int current_fds[RTE_MAX_LCORE] = {0};
++
++static
++int read_sysfs_cur_freq(unsigned int lcore_id) {
++	char path[PATH_MAX];
++
++	if (current_fds[lcore_id] == 0) {
++		snprintf(path, sizeof(path), POWER_SYSFS_CUR_PATH, rte_lcore_to_cpu_id(lcore_id));
++		current_fds[lcore_id] = open(path, O_RDONLY);
++		if (current_fds[lcore_id] == -1) {
++			return -1;
++		}
++	}
++
++	char buffer[16];
++	ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	int value = atoi(buffer);
++	return value;
++}
++
+ /* Helper function to check if the lcore is enabled.
+  * Cannot use rte_lcore_is_enabled since it only catches ROLE_RTE threads which
+  * does not include ROLE_NON_EAL threads which some application threads, for
+@@ -102,6 +288,33 @@ int rte_lcore_busyness(unsigned int lcore_id)
+ 	return telemetry_data[lcore_id].busyness;
+ }
+ 
++int rte_lcore_capacity(unsigned int lcore_id)
++{
++	const uint64_t active_thresh = RTE_LCORE_BUSYNESS_PERIOD * 1000;
++	struct lcore_telemetry *tdata;
++
++	if (lcore_id >= RTE_MAX_LCORE)
++		return -EINVAL;
++	tdata = &telemetry_data[lcore_id];
++
++	/* if the lcore is not active */
++	if (tdata->interval_ts == 0)
++		return LCORE_BUSYNESS_NOT_SET;
++	/* if the core hasn't been active in a while */
++	else if ((rte_rdtsc() - tdata->interval_ts) > active_thresh)
++		return LCORE_BUSYNESS_NOT_SET;
++
++	int cur_freq = read_sysfs_cur_freq(rte_lcore_to_cpu_id(lcore_id));
++	int busy = telemetry_data[lcore_id].busyness;
++	int p1 = read_sysfs_p1_freq(lcore_id) ;
++
++	if ((busy == -1) || (p1 <= 0)) {
++		return -1;
++	} else {
++		return busy * cur_freq / p1;
++	}
++}
++
+ int rte_lcore_busyness_enabled(void)
+ {
+ 	return __rte_lcore_telemetry_enabled;
+@@ -263,6 +476,26 @@ lcore_handle_busyness(const char *cmd __rte_unused,
+ 	return 0;
+ }
+ 
++static int
++lcore_handle_capacity(const char *cmd __rte_unused,
++		      const char *params __rte_unused, struct rte_tel_data *d)
++{
++	char corenum[64];
++	int i;
++
++	rte_tel_data_start_dict(d);
++
++	/* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */
++	for (i = 0; i < RTE_MAX_LCORE; i++) {
++		if (!lcore_enabled(i))
++			continue;
++		snprintf(corenum, sizeof(corenum), "%d", i);
++		rte_tel_data_add_dict_int(d, corenum, rte_lcore_capacity(i));
++	}
++
++	return 0;
++}
++
+ static int
+ lcore_handle_cpuset(const char *cmd __rte_unused,
+ 		    const char *params __rte_unused,
+@@ -326,6 +559,9 @@ RTE_INIT(lcore_init_telemetry)
+ 	rte_telemetry_register_cmd("/eal/lcore/busyness", lcore_handle_busyness,
+ 				   "return percentage busyness of cores");
+ 
++	rte_telemetry_register_cmd("/eal/lcore/capacity_used", lcore_handle_capacity,
++				   "return percentage capacity of cores");
++
+ 	rte_telemetry_register_cmd("/eal/lcore/busyness_enable", lcore_busyness_enable,
+ 				   "enable lcore busyness measurement");
+ 
+@@ -340,6 +576,11 @@ RTE_INIT(lcore_init_telemetry)
+ 
+ #else
+ 
++int rte_lcore_capacity(unsigned int lcore_id __rte_unused)
++{
++	return -ENOTSUP;
++}
++
+ int rte_lcore_busyness(unsigned int lcore_id __rte_unused)
+ {
+ 	return -ENOTSUP;
+diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
+index 3c64774bcb..dffb7d1ab5 100644
+--- a/lib/eal/include/rte_lcore.h
++++ b/lib/eal/include/rte_lcore.h
+@@ -426,6 +426,27 @@ __rte_experimental
+ int
+ rte_lcore_busyness(unsigned int lcore_id);
+ 
++/**
++ * @warning
++ * @b EXPERIMENTAL: this API may change without prior notice.
++ *
++ * Read capacity value corresponding to an lcore.
++ * This differs from busyness in that it is related to the current usage
++ * of the lcore compared to P1 frequency, not the current frequency.
++ *
++ * @param lcore_id
++ *   Lcore to read capacity value for.
++ * @return
++ *   - value between 0 and 100 on success
++ *   - -1 if lcore is not active
++ *   - -EINVAL if lcore is invalid
++ *   - -ENOMEM if not enough memory available
++ *   - -ENOTSUP if not supported
++ */
++__rte_experimental
++int
++rte_lcore_capacity(unsigned int lcore_id);
++
+ /**
+  * @warning
+  * @b EXPERIMENTAL: this API may change without prior notice.
+diff --git a/lib/eal/version.map b/lib/eal/version.map
+index a4451d58eb..a2a3ba045f 100644
+--- a/lib/eal/version.map
++++ b/lib/eal/version.map
+@@ -440,6 +440,7 @@ EXPERIMENTAL {
+ 	# added in 20.11
+ 	__rte_lcore_telemetry_timestamp;
+ 	__rte_lcore_telemetry_enabled;
++	rte_lcore_capacity;
+ 	rte_lcore_busyness;
+ 	rte_lcore_busyness_enabled;
+ 	rte_lcore_busyness_enabled_set;
+-- 
+2.25.1
+
diff --git a/ipm/patches/dpdk/README.md b/ipm/patches/dpdk/README.md
index 7b4620a..6876941 100644
--- a/ipm/patches/dpdk/README.md
+++ b/ipm/patches/dpdk/README.md
@@ -1,6 +1,7 @@
 # DPDK Patches
 
 Apply the patches using ```git am {patch}.patch```.
-1. ```20.11 directory``` are a set of patches that add the busyness telemetry to DPDK 20.11.
-2. ```21.11 directory``` are a set of patches that add the busyness telemetry to DPDK 21.11.2.
-3. ```22.11 directory``` are a set of patches that add the busyness telemetry to DPDK 22.11.
+1. ```20.11 directory``` are a set of patches that add the busyness telemetry to DPDK 20.11.9
+2. ```21.11 directory``` are a set of patches that add the busyness telemetry to DPDK 21.11.8
+3. ```22.11 directory``` are a set of patches that add the busyness telemetry to DPDK 22.11.6
+4. ```23.11 directory``` are a set of patches that add the busyness telemetry to DPDK 23.11.2
diff --git a/ipm/patches/vpp/20.09/0004-Subject-PATCH-1-1-stats-Added-capacity-flags.patch b/ipm/patches/vpp/20.09/0004-Subject-PATCH-1-1-stats-Added-capacity-flags.patch
new file mode 100644
index 0000000..ad5ece9
--- /dev/null
+++ b/ipm/patches/vpp/20.09/0004-Subject-PATCH-1-1-stats-Added-capacity-flags.patch
@@ -0,0 +1,325 @@
+From 5fdd49609bb3ea985196b5fc148f87abcfce7a21 Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Tue, 1 Oct 2024 15:33:49 +0000
+Subject: [PATCH 1/1] stats: Added capacity flag in stats
+
+Busyness is calculated on how busy the current core is, ignoring the
+current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows
+as 100% busy at 1GHz.
+
+This patch adds a new 'capacity' metric that shows a percentage based on
+the P1 (base) freqency of the core, so that if the core is 50% busy at
+P1, it should show 50% regardless of what the current frequency is.
+
+---
+ src/vlib/cli.c               |  31 ++++++
+ src/vlib/main.h              |   1 +
+ src/vpp/stats/stat_segment.c | 199 ++++++++++++++++++++++++++++++++++-
+ src/vpp/stats/stat_segment.h |   3 +
+ 4 files changed, 233 insertions(+), 1 deletion(-)
+
+diff --git a/src/vlib/cli.c b/src/vlib/cli.c
+index 0267f4e58..113a28fbd 100644
+--- a/src/vlib/cli.c
++++ b/src/vlib/cli.c
+@@ -915,6 +915,37 @@ VLIB_CLI_COMMAND (show_cpu_load_command, static) = {
+ };
+ /* *INDENT-ON* */
+ 
++static clib_error_t *
++show_cpu_capacity (vlib_main_t * vm, unformat_input_t * input,
++               vlib_cli_command_t * cmd)
++{
++ uword i;
++
++  vlib_cli_output (vm, "%10s | %10s | %12s", "Thread", "Core", "Load %");
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      vlib_main_t *vm_i;
++
++      vm_i = vlib_get_main_by_index (i);
++      if (!vm_i)
++        continue;
++
++      vlib_cli_output (vm, "%8u   | %8u   |   %8.2f", i, vm_i->cpu_id,
++              (f64)vm_i->cpu_capacity / 100.0);
++    }
++
++  return 0;
++}
++
++/* *INDENT-OFF* */
++VLIB_CLI_COMMAND (show_cpu_capacity_command, static) = {
++  .path = "show cpu capacity",
++  .short_help = "Show cpu capacity",
++  .function = show_cpu_capacity,
++  .is_mp_safe = 1,
++};
++/* *INDENT-ON* */
+ 
+ static clib_error_t *
+ show_cpu (vlib_main_t * vm, unformat_input_t * input,
+diff --git a/src/vlib/main.h b/src/vlib/main.h
+index eba5b0be9..e9ddf1aae 100644
+--- a/src/vlib/main.h
++++ b/src/vlib/main.h
+@@ -138,6 +138,7 @@ typedef struct vlib_main_t
+   u64 cpu_load_clocks;
+   u32 cpu_load_points;
+   u32 cpuload_burst;
++  u64 cpu_capacity;
+ 
+   /* Incremented once for each main loop. */
+   volatile u32 main_loop_count;
+diff --git a/src/vpp/stats/stat_segment.c b/src/vpp/stats/stat_segment.c
+index 04abcc1e3..e3c76f629 100644
+--- a/src/vpp/stats/stat_segment.c
++++ b/src/vpp/stats/stat_segment.c
+@@ -23,9 +23,192 @@
+ #include <vppinfra/linux/syscall.h>
+ #include <vpp-api/client/stat_client.h>
+ #include <vppinfra/mheap.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <fcntl.h>
++#include <unistd.h>
++
++#define MSR_PLATFORM_INFO 0xCE
++#define POWER_SYSFS_CUR_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
++#define POWER_SYSFS_BASE_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
++#define POWER_SYSFS_SCALING_DRIVER_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver"
++#define POWER_SYSFS_SCALING_MAX_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
++#define POWER_SYSFS_MSR_PATH    "/dev/cpu/%u/msr"
++
++#define MAX_LCORE 1280
++#define PATH_MAX  4096
++int current_fds[MAX_LCORE] = {0};
++static int p1_freq[MAX_LCORE] = {0};
+ 
+ stat_segment_main_t stat_segment_main;
+ 
++static int
++try_read_base_frequency(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, lcore_id);
++
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++	close(fd);
++
++	p1_freq[lcore_id] = atoi(buffer);
++	return p1_freq[lcore_id];
++
++
++}
++
++static int
++try_read_scaling_max_freq(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int freq;
++	int fd;
++
++	/*
++	 * If the driver is acpi_cpufreq, we can read the scaling_max_freq file
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, lcore_id);
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	close(fd);
++
++	if (strncmp(buffer, "acpi-cpufreq", 12) == 0) {
++		/* we can use the scaling_max_freq to get the p1 */
++		snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, lcore_id);
++		fd = open(path, O_RDONLY);
++		if (fd == -1) {
++			return -1;
++		}
++		ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++		if (bytesRead == -1) {
++			return -1;
++		}
++		buffer[bytesRead] = '\0'; // Null-terminate the buffer
++		close(fd);
++		freq = atoi(buffer) / 1000; /* convert to KHz */
++
++		/*
++		 * If the freq value ends with '1', then, turbo is enabled.
++		 * Round it down to the nearest 100. Otherwuse use the value.
++		 */
++		return (freq & ~1) * 1000; /* convert to Hz */
++	}
++	return -1;
++}
++
++static int
++try_read_msr(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	int freq;
++	uint64_t data;
++
++	/*
++	 * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, lcore_id);
++	fd = open(path, O_RDONLY);
++	if (fd < 0) {
++		return -1;
++	}
++
++	if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) {
++		close(fd);
++		return -1;
++	}
++
++	close(fd);
++
++	freq = ((data >> 8) & 0xff) * 100 * 1000;
++
++	return freq;
++}
++
++
++static
++int read_sysfs_p1_freq(unsigned int lcore_id) {
++	int freq;
++
++	/* We've previously got the p1 frequency. */
++	if (p1_freq[lcore_id] != 0)
++		return p1_freq[lcore_id];
++
++	/*
++	 * Check the base_frequency file, if it's there
++	 */
++	freq = try_read_base_frequency(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Check the scaling_max_freq file for the acpi-freq driver
++	 */
++	freq = try_read_scaling_max_freq(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Try reading from the MSR register
++	 */
++	freq = try_read_msr(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	return -1;
++}
++
++static
++int read_sysfs_cur_freq(unsigned int lcore_id) {
++	char path[PATH_MAX];
++
++	if (current_fds[lcore_id] == 0) {
++		sprintf(path, POWER_SYSFS_CUR_PATH, lcore_id);
++		current_fds[lcore_id] = open(path, O_RDONLY);
++		if (current_fds[lcore_id] == -1) {
++			perror("Failed to open file");
++			return -1;
++		}
++	}
++
++    char buffer[16];
++    ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0);
++    if (bytesRead == -1) {
++        perror("Failed to read file");
++        return -1;
++    }
++
++    buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++    int value = atoi(buffer);
++    return value;
++}
++
+ /*
+  *  Used only by VPP writers
+  */
+@@ -609,6 +792,8 @@ do_stat_segment_updates (stat_segment_main_t * sm)
+ 		      [STAT_COUNTER_CPU_UTIL_PER_WORKER], 0);
+       stat_validate_counter_vector (&sm->directory_vector
+ 		      [STAT_COUNTER_QUEUE_BURST_PER_WORKER], 0);
++      stat_validate_counter_vector (&sm->directory_vector
++		      [STAT_COUNTER_CPU_CAPACITY], 0);
+       num_worker_threads_set = 1;
+       vlib_stat_segment_unlock ();
+       clib_mem_set_heap (oldheap);
+@@ -644,7 +829,19 @@ do_stat_segment_updates (stat_segment_main_t * sm)
+       stat_set_simple_counter (&sm->directory_vector
+                      [STAT_COUNTER_QUEUE_BURST_PER_WORKER], i, 0,
+                      ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpuload_burst)));
+-
++      /* Calculate capacity */
++      int core_id = this_vlib_main->cpu_id;
++      int cur_freq = read_sysfs_cur_freq(core_id);
++      int p1 = read_sysfs_p1_freq(core_id) ;
++      if (p1 <= 0) {
++        this_vlib_main->cpu_capacity = 0;
++      } else {
++        this_vlib_main->cpu_capacity = (u64)this_vlib_main->cpu_load_points * cur_freq/p1;
++      }
++      /* Set the per-worker capacity */
++      stat_set_simple_counter (&sm->directory_vector
++                     [STAT_COUNTER_CPU_CAPACITY], i, 0,
++                     ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpu_capacity/100)));
+     }
+ 
+   /* And set the system average rate */
+diff --git a/src/vpp/stats/stat_segment.h b/src/vpp/stats/stat_segment.h
+index a92b87cff..58e977819 100644
+--- a/src/vpp/stats/stat_segment.h
++++ b/src/vpp/stats/stat_segment.h
+@@ -27,6 +27,7 @@ typedef enum
+  STAT_COUNTER_VECTOR_RATE_PER_WORKER,
+  STAT_COUNTER_CPU_UTIL_PER_WORKER,
+  STAT_COUNTER_QUEUE_BURST_PER_WORKER,
++ STAT_COUNTER_CPU_CAPACITY,
+  STAT_COUNTER_INPUT_RATE,
+  STAT_COUNTER_LAST_UPDATE,
+  STAT_COUNTER_LAST_STATS_CLEAR,
+@@ -50,6 +51,8 @@ typedef enum
+     cpu_util_per_worker, /sys)                                  \
+   _(QUEUE_BURST_PER_WORKER, COUNTER_VECTOR_SIMPLE,              \
+    queue_burst_per_worker, /sys)                                \
++  _(CPU_CAPACITY, COUNTER_VECTOR_SIMPLE,                        \
++   capacity_per_worker, /sys)                                   \
+   _(NUM_WORKER_THREADS, SCALAR_INDEX, num_worker_threads, /sys) \
+   _(INPUT_RATE, SCALAR_INDEX, input_rate, /sys)                 \
+   _(LAST_UPDATE, SCALAR_INDEX, last_update, /sys)               \
+-- 
+2.25.1
+
diff --git a/ipm/patches/vpp/21.01/0004-Subject-PATCH-1-1-stats-Added-capacity-flags.patch b/ipm/patches/vpp/21.01/0004-Subject-PATCH-1-1-stats-Added-capacity-flags.patch
new file mode 100644
index 0000000..7cdd3bf
--- /dev/null
+++ b/ipm/patches/vpp/21.01/0004-Subject-PATCH-1-1-stats-Added-capacity-flags.patch
@@ -0,0 +1,325 @@
+From 578a1dffb5b57781fa0c6e78736c38b1c87a046f Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Tue, 1 Oct 2024 17:10:32 +0000
+Subject: [PATCH 1/1] stats: Added capacity flag in stats
+
+Busyness is calculated on how busy the current core is, ignoring the
+current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows
+as 100% busy at 1GHz.
+
+This patch adds a new 'capacity' metric that shows a percentage based on
+the P1 (base) freqency of the core, so that if the core is 50% busy at
+P1, it should show 50% regardless of what the current frequency is.
+
+---
+ src/vlib/cli.c               |  31 ++++++
+ src/vlib/main.h              |   1 +
+ src/vpp/stats/stat_segment.c | 199 ++++++++++++++++++++++++++++++++++-
+ src/vpp/stats/stat_segment.h |   3 +
+ 4 files changed, 233 insertions(+), 1 deletion(-)
+
+diff --git a/src/vlib/cli.c b/src/vlib/cli.c
+index ed78b045f..ce2047f25 100644
+--- a/src/vlib/cli.c
++++ b/src/vlib/cli.c
+@@ -949,6 +949,37 @@ VLIB_CLI_COMMAND (show_cpu_load_command, static) = {
+ };
+ /* *INDENT-ON* */
+ 
++static clib_error_t *
++show_cpu_capacity (vlib_main_t * vm, unformat_input_t * input,
++               vlib_cli_command_t * cmd)
++{
++ uword i;
++
++  vlib_cli_output (vm, "%10s | %10s | %12s", "Thread", "Core", "Load %");
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      vlib_main_t *vm_i;
++
++      vm_i = vlib_get_main_by_index (i);
++      if (!vm_i)
++        continue;
++
++      vlib_cli_output (vm, "%8u   | %8u   |   %8.2f", i, vm_i->cpu_id,
++              (f64)vm_i->cpu_capacity / 100.0);
++    }
++
++  return 0;
++}
++
++/* *INDENT-OFF* */
++VLIB_CLI_COMMAND (show_cpu_capacity_command, static) = {
++  .path = "show cpu capacity",
++  .short_help = "Show cpu capacity",
++  .function = show_cpu_capacity,
++  .is_mp_safe = 1,
++};
++/* *INDENT-ON* */
+ 
+ static clib_error_t *
+ show_cpu (vlib_main_t * vm, unformat_input_t * input,
+diff --git a/src/vlib/main.h b/src/vlib/main.h
+index c997381bc..8e07b4a86 100644
+--- a/src/vlib/main.h
++++ b/src/vlib/main.h
+@@ -137,6 +137,7 @@ typedef struct vlib_main_t
+   u64 cpu_load_clocks;
+   u32 cpu_load_points;
+   u32 cpuload_burst;
++  u64 cpu_capacity;
+ 
+   /* Incremented once for each main loop. */
+   volatile u32 main_loop_count;
+diff --git a/src/vpp/stats/stat_segment.c b/src/vpp/stats/stat_segment.c
+index ed806d208..4ee7a8c20 100644
+--- a/src/vpp/stats/stat_segment.c
++++ b/src/vpp/stats/stat_segment.c
+@@ -22,9 +22,192 @@
+ #undef HAVE_MEMFD_CREATE
+ #include <vppinfra/linux/syscall.h>
+ #include <vpp-api/client/stat_client.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <fcntl.h>
++#include <unistd.h>
++
++#define MSR_PLATFORM_INFO 0xCE
++#define POWER_SYSFS_CUR_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
++#define POWER_SYSFS_BASE_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
++#define POWER_SYSFS_SCALING_DRIVER_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver"
++#define POWER_SYSFS_SCALING_MAX_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
++#define POWER_SYSFS_MSR_PATH    "/dev/cpu/%u/msr"
++
++#define MAX_LCORE 1280
++#define PATH_MAX  4096
++int current_fds[MAX_LCORE] = {0};
++static int p1_freq[MAX_LCORE] = {0};
+ 
+ stat_segment_main_t stat_segment_main;
+ 
++static int
++try_read_base_frequency(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, lcore_id);
++
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++	close(fd);
++
++	p1_freq[lcore_id] = atoi(buffer);
++	return p1_freq[lcore_id];
++
++
++}
++
++static int
++try_read_scaling_max_freq(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int freq;
++	int fd;
++
++	/*
++	 * If the driver is acpi_cpufreq, we can read the scaling_max_freq file
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, lcore_id);
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	close(fd);
++
++	if (strncmp(buffer, "acpi-cpufreq", 12) == 0) {
++		/* we can use the scaling_max_freq to get the p1 */
++		snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, lcore_id);
++		fd = open(path, O_RDONLY);
++		if (fd == -1) {
++			return -1;
++		}
++		ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++		if (bytesRead == -1) {
++			return -1;
++		}
++		buffer[bytesRead] = '\0'; // Null-terminate the buffer
++		close(fd);
++		freq = atoi(buffer) / 1000; /* convert to KHz */
++
++		/*
++		 * If the freq value ends with '1', then, turbo is enabled.
++		 * Round it down to the nearest 100. Otherwuse use the value.
++		 */
++		return (freq & ~1) * 1000; /* convert to Hz */
++	}
++	return -1;
++}
++
++static int
++try_read_msr(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	int freq;
++	uint64_t data;
++
++	/*
++	 * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, lcore_id);
++	fd = open(path, O_RDONLY);
++	if (fd < 0) {
++		return -1;
++	}
++
++	if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) {
++		close(fd);
++		return -1;
++	}
++
++	close(fd);
++
++	freq = ((data >> 8) & 0xff) * 100 * 1000;
++
++	return freq;
++}
++
++
++static
++int read_sysfs_p1_freq(unsigned int lcore_id) {
++	int freq;
++
++	/* We've previously got the p1 frequency. */
++	if (p1_freq[lcore_id] != 0)
++		return p1_freq[lcore_id];
++
++	/*
++	 * Check the base_frequency file, if it's there
++	 */
++	freq = try_read_base_frequency(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Check the scaling_max_freq file for the acpi-freq driver
++	 */
++	freq = try_read_scaling_max_freq(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Try reading from the MSR register
++	 */
++	freq = try_read_msr(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	return -1;
++}
++
++static
++int read_sysfs_cur_freq(unsigned int lcore_id) {
++	char path[PATH_MAX];
++
++	if (current_fds[lcore_id] == 0) {
++		sprintf(path, POWER_SYSFS_CUR_PATH, lcore_id);
++		current_fds[lcore_id] = open(path, O_RDONLY);
++		if (current_fds[lcore_id] == -1) {
++			perror("Failed to open file");
++			return -1;
++		}
++	}
++
++    char buffer[16];
++    ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0);
++    if (bytesRead == -1) {
++        perror("Failed to read file");
++        return -1;
++    }
++
++    buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++    int value = atoi(buffer);
++    return value;
++}
++
+ /*
+  *  Used only by VPP writers
+  */
+@@ -613,6 +796,8 @@ do_stat_segment_updates (stat_segment_main_t * sm)
+                                     [STAT_COUNTER_CPU_UTIL_PER_WORKER], 0);
+       stat_validate_counter_vector (&sm->directory_vector
+                                     [STAT_COUNTER_QUEUE_BURST_PER_WORKER], 0);
++      stat_validate_counter_vector (&sm->directory_vector
++		      [STAT_COUNTER_CPU_CAPACITY], 0);                                
+       num_worker_threads_set = 1;
+       vlib_stat_segment_unlock ();
+       clib_mem_set_heap (oldheap);
+@@ -648,7 +833,19 @@ do_stat_segment_updates (stat_segment_main_t * sm)
+       stat_set_simple_counter (&sm->directory_vector
+ 		      [STAT_COUNTER_QUEUE_BURST_PER_WORKER], i, 0,
+ 		      ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpuload_burst)));
+-
++      /* Calculate capacity */
++      int core_id = this_vlib_main->cpu_id;
++      int cur_freq = read_sysfs_cur_freq(core_id);
++      int p1 = read_sysfs_p1_freq(core_id) ;
++      if (p1 <= 0) {
++        this_vlib_main->cpu_capacity = 0;
++      } else {
++        this_vlib_main->cpu_capacity = (u64)this_vlib_main->cpu_load_points * cur_freq/p1;
++      }
++      /* Set the per-worker capacity */
++      stat_set_simple_counter (&sm->directory_vector
++                     [STAT_COUNTER_CPU_CAPACITY], i, 0,
++                     ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpu_capacity/100)));
+     }
+ 
+   /* And set the system average rate */
+diff --git a/src/vpp/stats/stat_segment.h b/src/vpp/stats/stat_segment.h
+index fdf9986fb..c059f3c0a 100644
+--- a/src/vpp/stats/stat_segment.h
++++ b/src/vpp/stats/stat_segment.h
+@@ -27,6 +27,7 @@ typedef enum
+  STAT_COUNTER_VECTOR_RATE_PER_WORKER,
+  STAT_COUNTER_CPU_UTIL_PER_WORKER,
+  STAT_COUNTER_QUEUE_BURST_PER_WORKER,
++ STAT_COUNTER_CPU_CAPACITY,
+  STAT_COUNTER_INPUT_RATE,
+  STAT_COUNTER_LAST_UPDATE,
+  STAT_COUNTER_LAST_STATS_CLEAR,
+@@ -50,6 +51,8 @@ typedef enum
+     cpu_util_per_worker, /sys)                                  \
+   _(QUEUE_BURST_PER_WORKER, COUNTER_VECTOR_SIMPLE,              \
+    queue_burst_per_worker, /sys)                                \
++  _(CPU_CAPACITY, COUNTER_VECTOR_SIMPLE,                        \
++   capacity_per_worker, /sys)                                   \
+   _(NUM_WORKER_THREADS, SCALAR_INDEX, num_worker_threads, /sys) \
+   _(INPUT_RATE, SCALAR_INDEX, input_rate, /sys)                 \
+   _(LAST_UPDATE, SCALAR_INDEX, last_update, /sys)               \
+-- 
+2.25.1
+
diff --git a/ipm/patches/vpp/22.02/0004-stats-Added-capacity-flag-in-stats.patch b/ipm/patches/vpp/22.02/0004-stats-Added-capacity-flag-in-stats.patch
new file mode 100644
index 0000000..180aa4e
--- /dev/null
+++ b/ipm/patches/vpp/22.02/0004-stats-Added-capacity-flag-in-stats.patch
@@ -0,0 +1,365 @@
+From edbf641d6f1e3386425ed6999be3f52140586a6f Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Sun, 29 Sep 2024 17:29:47 +0000
+Subject: [PATCH 1/1] stats: Added capacity flag in stats
+
+Busyness is calculated on how busy the current core is, ignoring the
+current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows
+as 100% busy at 1GHz.
+
+This patch adds a new 'capacity' metric that shows a percentage based on
+the P1 (base) freqency of the core, so that if the core is 50% busy at
+P1, it should show 50% regardless of what the current frequency is.
+
+---
+ src/vlib/cli.c                        |  32 ++++
+ src/vlib/main.h                       |   1 +
+ src/vpp/stats/stat_segment.c          |   1 +
+ src/vpp/stats/stat_segment.h          |   1 +
+ src/vpp/stats/stat_segment_provider.c | 233 ++++++++++++++++++++++++++
+ 5 files changed, 268 insertions(+)
+
+diff --git a/src/vlib/cli.c b/src/vlib/cli.c
+index c1ae5f7c6..1fa6b2fdf 100644
+--- a/src/vlib/cli.c
++++ b/src/vlib/cli.c
+@@ -966,6 +966,38 @@ VLIB_CLI_COMMAND (show_cpu_load_command, static) = {
+ };
+ /* *INDENT-ON* */
+ 
++static clib_error_t *
++show_cpu_capacity (vlib_main_t * vm, unformat_input_t * input,
++               vlib_cli_command_t * cmd)
++{
++  uword i;
++
++  vlib_cli_output (vm, "%10s | %10s | %12s", "Thread", "Core", "Capacity %");
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      vlib_main_t *vm_i;
++
++      vm_i = vlib_get_main_by_index (i);
++      if (!vm_i)
++        continue;
++
++      vlib_cli_output (vm, "%8u   | %8u   |   %8.2f", i, vm_i->cpu_id,
++              (f64)vm_i->cpu_capacity / 100.0);
++    }
++
++  return 0;
++}
++
++/* *INDENT-OFF* */
++VLIB_CLI_COMMAND (show_cpu_capacity_command, static) = {
++  .path = "show cpu capacity",
++  .short_help = "Show cpu capacity",
++  .function = show_cpu_capacity,
++  .is_mp_safe = 1,
++};
++/* *INDENT-ON* */
++
+ static clib_error_t *
+ show_cpu (vlib_main_t * vm, unformat_input_t * input,
+ 	  vlib_cli_command_t * cmd)
+diff --git a/src/vlib/main.h b/src/vlib/main.h
+index 84c5ca051..25237f939 100644
+--- a/src/vlib/main.h
++++ b/src/vlib/main.h
+@@ -120,6 +120,7 @@ typedef struct vlib_main_t
+   u64 cpu_load_clocks;
+   u32 cpu_load_points;
+   u32 cpuload_burst;
++  u64 cpu_capacity;
+ 
+   /* Incremented once for each main loop. */
+   volatile u32 main_loop_count;
+diff --git a/src/vpp/stats/stat_segment.c b/src/vpp/stats/stat_segment.c
+index fb0d5b8ee..2b15895a2 100644
+--- a/src/vpp/stats/stat_segment.c
++++ b/src/vpp/stats/stat_segment.c
+@@ -758,6 +758,7 @@ do_stat_segment_updates (vlib_main_t *vm, stat_segment_main_t *sm)
+       stat_provider_register_vector_rate (tm->n_vlib_mains - 1);
+       stat_provider_register_cpu_util (tm->n_vlib_mains - 1);
+       stat_provider_register_queue_burst (tm->n_vlib_mains - 1);
++      stat_provider_register_capacity (tm->n_vlib_mains - 1);
+ 
+       sm->directory_vector[STAT_COUNTER_NUM_WORKER_THREADS].value =
+ 	tm->n_vlib_mains - 1;
+diff --git a/src/vpp/stats/stat_segment.h b/src/vpp/stats/stat_segment.h
+index 10e6e6791..2456efe3b 100644
+--- a/src/vpp/stats/stat_segment.h
++++ b/src/vpp/stats/stat_segment.h
+@@ -123,6 +123,7 @@ void vlib_stats_register_symlink (void *oldheap, u8 *name, u32 index1,
+ void stat_provider_register_vector_rate (u32 num_workers);
+ void stat_provider_register_cpu_util (u32 num_workers);
+ void stat_provider_register_queue_burst (u32 num_workers);
++void stat_provider_register_capacity (u32 num_workers);
+ 
+ f64
+ vlib_get_stat_segment_cpuload_rate (void);
+diff --git a/src/vpp/stats/stat_segment_provider.c b/src/vpp/stats/stat_segment_provider.c
+index 941026557..2aff45fec 100644
+--- a/src/vpp/stats/stat_segment_provider.c
++++ b/src/vpp/stats/stat_segment_provider.c
+@@ -23,6 +23,23 @@
+ #include <vlib/vlib.h>
+ #include <vlib/counter.h>
+ #include "stat_segment.h"
++#include <stdio.h>
++#include <stdlib.h>
++#include <fcntl.h>
++#include <unistd.h>
++
++#define MSR_PLATFORM_INFO 0xCE
++#define POWER_SYSFS_CUR_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
++#define POWER_SYSFS_BASE_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
++#define POWER_SYSFS_SCALING_DRIVER_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver"
++#define POWER_SYSFS_SCALING_MAX_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
++#define POWER_SYSFS_MSR_PATH    "/dev/cpu/%u/msr"
++
++#define MAX_LCORE 1280
++#define PATH_MAX  4096
++int current_fds[MAX_LCORE] = {0};
++static int p1_freq[MAX_LCORE] = {0};
++
+ 
+ clib_mem_heap_t **memory_heaps_vec;
+ u32 mem_vector_index;
+@@ -39,6 +56,173 @@ enum
+   STAT_MEM_RELEASABLE,
+ } stat_mem_usage_e;
+ 
++static int
++try_read_base_frequency(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, lcore_id);
++
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++	close(fd);
++
++	p1_freq[lcore_id] = atoi(buffer);
++	return p1_freq[lcore_id];
++
++
++}
++
++static int
++try_read_scaling_max_freq(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int freq;
++	int fd;
++
++	/*
++	 * If the driver is acpi_cpufreq, we can read the scaling_max_freq file
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, lcore_id);
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	close(fd);
++
++	if (strncmp(buffer, "acpi-cpufreq", 12) == 0) {
++		/* we can use the scaling_max_freq to get the p1 */
++		snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, lcore_id);
++		fd = open(path, O_RDONLY);
++		if (fd == -1) {
++			return -1;
++		}
++		ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++		if (bytesRead == -1) {
++			return -1;
++		}
++		buffer[bytesRead] = '\0'; // Null-terminate the buffer
++		close(fd);
++		freq = atoi(buffer) / 1000; /* convert to KHz */
++
++		/*
++		 * If the freq value ends with '1', then, turbo is enabled.
++		 * Round it down to the nearest 100. Otherwuse use the value.
++		 */
++		return (freq & ~1) * 1000; /* convert to Hz */
++	}
++	return -1;
++}
++
++static int
++try_read_msr(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	int freq;
++	uint64_t data;
++
++	/*
++	 * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, lcore_id);
++	fd = open(path, O_RDONLY);
++	if (fd < 0) {
++		return -1;
++	}
++
++	if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) {
++		close(fd);
++		return -1;
++	}
++
++	close(fd);
++
++	freq = ((data >> 8) & 0xff) * 100 * 1000;
++
++	return freq;
++}
++
++
++static
++int read_sysfs_p1_freq(unsigned int lcore_id) {
++	int freq;
++
++	/* We've previously got the p1 frequency. */
++	if (p1_freq[lcore_id] != 0)
++		return p1_freq[lcore_id];
++
++	/*
++	 * Check the base_frequency file, if it's there
++	 */
++	freq = try_read_base_frequency(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Check the scaling_max_freq file for the acpi-freq driver
++	 */
++	freq = try_read_scaling_max_freq(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Try reading from the MSR register
++	 */
++	freq = try_read_msr(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	return -1;
++}
++
++static
++int read_sysfs_cur_freq(unsigned int lcore_id) {
++	char path[PATH_MAX];
++
++	if (current_fds[lcore_id] == 0) {
++		sprintf(path, POWER_SYSFS_CUR_PATH, lcore_id);
++		current_fds[lcore_id] = open(path, O_RDONLY);
++		if (current_fds[lcore_id] == -1) {
++			perror("Failed to open file");
++			return -1;
++		}
++	}
++
++    char buffer[16];
++    ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0);
++    if (bytesRead == -1) {
++        perror("Failed to read file");
++        return -1;
++    }
++
++    buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++    int value = atoi(buffer);
++    return value;
++}
++
+ /*
+  * Called from the stats periodic process to update memory counters.
+  */
+@@ -211,6 +395,35 @@ stat_provider_queue_burst_per_thread_update_fn (
+     }
+ }
+ 
++static void
++stat_provider_capacity_per_thread_update_fn (
++  stat_segment_directory_entry_t *e, u32 index)
++{
++  vlib_main_t *this_vlib_main;
++  int i;
++  ASSERT (e->data);
++  counter_t **counters = e->data;
++  int core_id = 0;
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++
++      this_vlib_main = vlib_get_main_by_index (i);
++      core_id = this_vlib_main->cpu_id;
++      int cur_freq = read_sysfs_cur_freq(core_id);
++      int p1 = read_sysfs_p1_freq(core_id) ;
++      /* Set the per-worker queue burst */
++      counter_t *cb = counters[i];
++      /* Lower 8-bits is burst flag and rest is core id */
++      if (p1 <= 0) {
++        this_vlib_main->cpu_capacity = 0;
++      } else {
++        this_vlib_main->cpu_capacity = (u64)this_vlib_main->cpu_load_points * cur_freq/p1;
++      }
++      /* Lower 8-bits is capacity flag and rest is core id */
++      cb[0] = ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpu_capacity/100));
++    }
++}
+ 
+ void
+ stat_provider_register_vector_rate (u32 num_workers)
+@@ -279,3 +492,23 @@ stat_provider_register_queue_burst (u32 num_workers)
+   ep->data = stat_validate_counter_vector3 (ep->data, num_workers, 0);
+   vlib_stat_segment_unlock ();
+ }
++
++void
++stat_provider_register_capacity (u32 num_workers)
++{
++  int i;
++
++  u8 *s = format (0, "/sys/capacity_per_worker%c", 0);
++  i = stat_segment_new_entry (s, STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE);
++  if (i == ~0)
++    ASSERT (0);
++  vec_free (s);
++  stat_segment_poll_add (i, stat_provider_capacity_per_thread_update_fn, ~0,
++                         10);
++
++  stat_segment_main_t *sm = &stat_segment_main;
++  vlib_stat_segment_lock ();
++  stat_segment_directory_entry_t *ep = &sm->directory_vector[i];
++  ep->data = stat_validate_counter_vector3 (ep->data, num_workers, 0);
++  vlib_stat_segment_unlock ();
++}
+-- 
+2.25.1
+
diff --git a/ipm/patches/vpp/23.02/0001-vlib-CPU-load-measurement-and-CLI.patch b/ipm/patches/vpp/23.02/0001-vlib-CPU-load-measurement-and-CLI.patch
new file mode 100644
index 0000000..92dd444
--- /dev/null
+++ b/ipm/patches/vpp/23.02/0001-vlib-CPU-load-measurement-and-CLI.patch
@@ -0,0 +1,114 @@
+From 63409ec1173b4f63496cd5c6f404ab204d55690d Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Sun, 11 Aug 2024 15:44:39 +0000
+Subject: [PATCH 1/3] vlib: CPU load measurement and CLI
+
+The patch calculates CPU load based on number of ticks ellapsed in
+processing packets by main/worker thread.
+
+New CLI command to query CPU load:
+`show cpu load`
+
+Type: improvement
+---
+ src/vlib/cli.c  | 32 ++++++++++++++++++++++++++++++++
+ src/vlib/main.c | 15 +++++++++++++++
+ src/vlib/main.h |  6 ++++++
+ 3 files changed, 53 insertions(+)
+
+diff --git a/src/vlib/cli.c b/src/vlib/cli.c
+index 9c53200f8..3c5b7b533 100644
+--- a/src/vlib/cli.c
++++ b/src/vlib/cli.c
+@@ -986,6 +986,38 @@ VLIB_CLI_COMMAND (show_memory_usage_command, static) = {
+ };
+ /* *INDENT-ON* */
+ 
++static clib_error_t *
++show_cpu_load (vlib_main_t * vm, unformat_input_t * input,
++               vlib_cli_command_t * cmd)
++{
++  uword i;
++
++  vlib_cli_output (vm, "%10s | %10s | %12s", "Thread", "Core", "Load %");
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      vlib_main_t *vm_i;
++
++      vm_i = vlib_get_main_by_index (i);
++      if (!vm_i)
++        continue;
++
++      vlib_cli_output (vm, "%8u   | %8u   |   %8.2f", i, vm_i->cpu_id,
++              (f64)vm_i->cpu_load_points / 100.0);
++    }
++
++  return 0;
++}
++
++/* *INDENT-OFF* */
++VLIB_CLI_COMMAND (show_cpu_load_command, static) = {
++  .path = "show cpu load",
++  .short_help = "Show cpu load",
++  .function = show_cpu_load,
++  .is_mp_safe = 1,
++};
++/* *INDENT-ON* */
++
+ static clib_error_t *
+ show_cpu (vlib_main_t * vm, unformat_input_t * input,
+ 	  vlib_cli_command_t * cmd)
+diff --git a/src/vlib/main.c b/src/vlib/main.c
+index fc8006447..e567842a0 100644
+--- a/src/vlib/main.c
++++ b/src/vlib/main.c
+@@ -977,6 +977,9 @@ dispatch_node (vlib_main_t * vm,
+ 				      /* n_vectors */ n,
+ 				      /* n_clocks */ t - last_time_stamp);
+ 
++  if (n)
++    vm->cpu_load_clocks += t - last_time_stamp;
++
+   /* When in adaptive mode and vector rate crosses threshold switch to
+      polling mode and vice versa. */
+   if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE))
+@@ -1679,6 +1682,18 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
+       /* Record time stamp in case there are no enabled nodes and above
+          calls do not update time stamp. */
+       cpu_time_now = clib_cpu_time_now ();
++      /* Time to update cpu load? */
++      if (PREDICT_FALSE (cpu_time_now >= vm->cpu_load_interval_end) )
++        {
++          if (vm->cpu_load_interval_start)
++            {
++              vm->cpu_load_points = (vm->cpu_load_clocks * 1e4) /
++                  (cpu_time_now - vm->cpu_load_interval_start);
++            }
++          vm->cpu_load_interval_start = cpu_time_now;
++          vm->cpu_load_interval_end = cpu_time_now + 1e9;
++          vm->cpu_load_clocks = 0;
++        }
+       vm->loops_this_reporting_interval++;
+       now = clib_time_now_internal (&vm->clib_time, cpu_time_now);
+       /* Time to update loops_per_second? */
+diff --git a/src/vlib/main.h b/src/vlib/main.h
+index a9cfab4f8..856629ec3 100644
+--- a/src/vlib/main.h
++++ b/src/vlib/main.h
+@@ -115,6 +115,12 @@ typedef struct vlib_main_t
+   /* Time stamp when main loop was entered (time 0). */
+   u64 cpu_time_main_loop_start;
+ 
++  /* CPU load measurement */
++  u64 cpu_load_interval_start;
++  u64 cpu_load_interval_end;
++  u64 cpu_load_clocks;
++  u32 cpu_load_points;
++
+   /* Incremented once for each main loop. */
+   volatile u32 main_loop_count;
+ 
+-- 
+2.25.1
+
diff --git a/ipm/patches/vpp/23.02/0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch b/ipm/patches/vpp/23.02/0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch
new file mode 100644
index 0000000..c2c8e2b
--- /dev/null
+++ b/ipm/patches/vpp/23.02/0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch
@@ -0,0 +1,343 @@
+From 8ed57819abe56ed4e7e81e05b4115bc6621ede45 Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Tue, 13 Aug 2024 17:27:07 +0000
+Subject: [PATCH 2/3] stats: Added CPU load and queue burst flag in stats
+
+This patch adds following capabilities:
+- flag to indicate when number of packets in DPDK queue cross
+configurable queue threshold.
+- Stats config parameter to configure interval for CPU load
+measurement.
+  `cpuload-interval <time in seconds>`
+- Queue threshold parameter in DPDK config section.
+  `queue-threshold <float value between 0 to 1>`
+
+Type: improvement
+---
+ src/plugins/dpdk/device/dpdk.h |  1 +
+ src/plugins/dpdk/device/init.c |  2 +
+ src/plugins/dpdk/device/node.c | 15 +++++-
+ src/vlib/main.c                |  7 ++-
+ src/vlib/main.h                |  1 +
+ src/vlib/node_cli.c            |  7 +++
+ src/vlib/stats/collector.c     |  5 +-
+ src/vlib/stats/init.c          |  3 ++
+ src/vlib/stats/provider_mem.c  | 93 ++++++++++++++++++++++++++++++++++
+ src/vlib/stats/stats.c         |  8 +++
+ src/vlib/stats/stats.h         |  7 +++
+ 11 files changed, 146 insertions(+), 3 deletions(-)
+
+diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
+index 7569fc60a..04249e05a 100644
+--- a/src/plugins/dpdk/device/dpdk.h
++++ b/src/plugins/dpdk/device/dpdk.h
+@@ -273,6 +273,7 @@ typedef struct
+ #define DPDK_MAX_SIMD_BITWIDTH_256     256
+ #define DPDK_MAX_SIMD_BITWIDTH_512     512
+ 
++  f64 queue_threshold;
+   /*
+    * format interface names ala xxxEthernet%d/%d/%d instead of
+    * xxxEthernet%x/%x/%x.
+diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
+index 83ce2dc9c..5216732d3 100644
+--- a/src/plugins/dpdk/device/init.c
++++ b/src/plugins/dpdk/device/init.c
+@@ -1034,6 +1034,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
+       else if (unformat (input, "max-simd-bitwidth %U",
+ 			 unformat_max_simd_bitwidth, &conf->max_simd_bitwidth))
+ 	;
++      else if (unformat (input, "queue-threshold %f", &conf->queue_threshold))
++  ;
+       else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
+ 			 &sub_input))
+ 	{
+diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
+index b46003245..4da190738 100644
+--- a/src/plugins/dpdk/device/node.c
++++ b/src/plugins/dpdk/device/node.c
+@@ -544,6 +544,8 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+   uword n_rx_packets = 0;
+   vnet_hw_if_rxq_poll_vector_t *pv;
+   u32 thread_index = vm->thread_index;
++  bool burst = false;
++  int rx_ring_length = 0;
+ 
+   /*
+    * Poll all devices on this cpu for input/interrupts.
+@@ -554,9 +556,20 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+   for (int i = 0; i < vec_len (pv); i++)
+     {
+       xd = vec_elt_at_index (dm->devices, pv[i].dev_instance);
+-      n_rx_packets +=
++
++      /* find the queue occupancy */
++      rx_ring_length = rte_eth_rx_queue_count(xd->port_id, pv[i].queue_id);
++
++      if (rx_ring_length > (xd->conf.n_rx_desc * dm->conf->queue_threshold)) {
++      /* set the burst flag for this lcore */
++        burst = true;
++      }
++
++        n_rx_packets +=
+ 	dpdk_device_input (vm, dm, xd, node, thread_index, pv[i].queue_id);
+     }
++  vm->cpuload_burst = burst;
++
+   return n_rx_packets;
+ }
+ 
+diff --git a/src/vlib/main.c b/src/vlib/main.c
+index e567842a0..68a421733 100644
+--- a/src/vlib/main.c
++++ b/src/vlib/main.c
+@@ -44,6 +44,7 @@
+ #include <vlib/stats/stats.h>
+ #include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
+ 
++#include <vlib/stats/stats.h>
+ #include <vlib/unix/unix.h>
+ 
+ #define VLIB_FRAME_MAGIC (0xabadc0ed)
+@@ -1457,6 +1458,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
+   f64 now;
+   vlib_frame_queue_main_t *fqm;
+   u32 frame_queue_check_counter = 0;
++  f64 cpuload_interval;
+ 
+   /* Initialize pending node vector. */
+   if (is_main)
+@@ -1474,6 +1476,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
+   else
+     cpu_time_now = clib_cpu_time_now ();
+ 
++  cpuload_interval = vm->clib_time.clocks_per_second *
++      vlib_get_stat_segment_cpuload_rate();
++
+   /* Pre-allocate interupt runtime indices and lock. */
+   vec_alloc_aligned (nm->pending_interrupts, 1, CLIB_CACHE_LINE_BYTES);
+ 
+@@ -1691,7 +1696,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
+                   (cpu_time_now - vm->cpu_load_interval_start);
+             }
+           vm->cpu_load_interval_start = cpu_time_now;
+-          vm->cpu_load_interval_end = cpu_time_now + 1e9;
++          vm->cpu_load_interval_end = cpu_time_now + cpuload_interval;
+           vm->cpu_load_clocks = 0;
+         }
+       vm->loops_this_reporting_interval++;
+diff --git a/src/vlib/main.h b/src/vlib/main.h
+index 856629ec3..b5f3bc057 100644
+--- a/src/vlib/main.h
++++ b/src/vlib/main.h
+@@ -120,6 +120,7 @@ typedef struct vlib_main_t
+   u64 cpu_load_interval_end;
+   u64 cpu_load_clocks;
+   u32 cpu_load_points;
++  u32 cpuload_burst;
+ 
+   /* Incremented once for each main loop. */
+   volatile u32 main_loop_count;
+diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
+index 075430e47..81a29c9fb 100644
+--- a/src/vlib/node_cli.c
++++ b/src/vlib/node_cli.c
+@@ -466,6 +466,13 @@ format_vlib_node_stats (u8 * s, va_list * va)
+   return s;
+ }
+ 
++f64 vlib_get_stat_segment_cpuload_rate (void) __attribute__ ((weak));
++f64
++vlib_get_stat_segment_cpuload_rate (void)
++{
++  return 1e70;
++}
++
+ static clib_error_t *
+ show_node_runtime (vlib_main_t * vm,
+ 		   unformat_input_t * input, vlib_cli_command_t * cmd)
+diff --git a/src/vlib/stats/collector.c b/src/vlib/stats/collector.c
+index 53fbfa01a..dafa210be 100644
+--- a/src/vlib/stats/collector.c
++++ b/src/vlib/stats/collector.c
+@@ -170,7 +170,10 @@ stat_segment_collector_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+     }
+ 
+   sm->directory_vector[STAT_COUNTER_BOOTTIME].value = unix_time_now ();
+-
++  /* Count number of worker threads only */
++  u32 num_worker_thread = vlib_get_n_threads () - 1;
++  vlib_stats_provider_register_cpu_util (num_worker_thread);
++  vlib_stats_provider_register_queue_burst (num_worker_thread);
+   while (1)
+     {
+       do_stat_segment_updates (vm, sm);
+diff --git a/src/vlib/stats/init.c b/src/vlib/stats/init.c
+index 8b382daf3..9a132327d 100644
+--- a/src/vlib/stats/init.c
++++ b/src/vlib/stats/init.c
+@@ -138,6 +138,7 @@ statseg_config (vlib_main_t *vm, unformat_input_t *input)
+ {
+   vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+   sm->update_interval = 10.0;
++  sm->cpuload_interval = 10.0;
+ 
+   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+     {
+@@ -158,6 +159,8 @@ statseg_config (vlib_main_t *vm, unformat_input_t *input)
+ 	sm->node_counters_enabled = 0;
+       else if (unformat (input, "update-interval %f", &sm->update_interval))
+ 	;
++      else if (unformat (input, "cpuload-interval %f", &sm->cpuload_interval))
++  ;
+       else
+ 	return clib_error_return (0, "unknown input `%U'",
+ 				  format_unformat_error, input);
+diff --git a/src/vlib/stats/provider_mem.c b/src/vlib/stats/provider_mem.c
+index f3a3f5d3e..f8e77a9fc 100644
+--- a/src/vlib/stats/provider_mem.c
++++ b/src/vlib/stats/provider_mem.c
+@@ -66,3 +66,96 @@ vlib_stats_register_mem_heap (clib_mem_heap_t *heap)
+   r.collect_fn = stat_provider_mem_usage_update_fn;
+   vlib_stats_register_collector_fn (&r);
+ }
++
++static counter_t **
++stat_validate_counter_vector3 (counter_t **counters, u32 max1, u32 max2)
++{
++  vlib_stats_segment_t *sm = vlib_stats_get_segment ();
++  int i;
++  void *oldheap = clib_mem_set_heap (sm->heap);
++  vec_validate_aligned (counters, max1, CLIB_CACHE_LINE_BYTES);
++  for (i = 0; i <= max1; i++)
++    vec_validate_aligned (counters[i], max2, CLIB_CACHE_LINE_BYTES);
++  clib_mem_set_heap (oldheap);
++  return counters;
++}
++
++static void
++stat_provider_cpu_util_per_thread_update_fn (
++  vlib_stats_collector_data_t *d)
++{
++  vlib_main_t *this_vlib_main;
++  int i;
++  ASSERT (d->entry->data);
++  counter_t **counters = d->entry->data;
++  counter_t *cb;
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      this_vlib_main = vlib_get_main_by_index (i);
++      /* Set the per-worker cpu util */
++      cb = counters[i];
++      cb[0] = this_vlib_main->cpu_load_points/100;
++    }
++}
++
++static void
++stat_provider_queue_burst_per_thread_update_fn (
++  vlib_stats_collector_data_t *d)
++{
++  vlib_main_t *this_vlib_main;
++  int i;
++  ASSERT (d->entry->data);
++  counter_t **counters = d->entry->data;
++  counter_t *cb;
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      this_vlib_main = vlib_get_main_by_index (i);
++      /* Set the per-worker queue burst */
++      cb = counters[i];
++      cb[0] = this_vlib_main->cpuload_burst;
++    }
++}
++
++void
++vlib_stats_provider_register_cpu_util (u32 num_workers)
++{
++  vlib_stats_collector_reg_t r = {};
++
++  u32 idx;
++  r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/cpu_util_per_worker");
++  if (idx == ~0)
++    ASSERT (0);
++
++  vlib_stats_segment_t *sm = vlib_stats_get_segment ();
++  vlib_stats_segment_lock ();
++  vlib_stats_entry_t *ep = &sm->directory_vector[idx];
++  ep->data = stat_validate_counter_vector3 (ep->data, num_workers, 0);
++  vlib_stats_segment_unlock ();
++
++  r.private_data = 1;
++  r.collect_fn = stat_provider_cpu_util_per_thread_update_fn;
++  vlib_stats_register_collector_fn (&r);
++}
++
++void
++vlib_stats_provider_register_queue_burst (u32 num_workers)
++{
++  vlib_stats_collector_reg_t r = {};
++
++  u32 idx;
++  r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/queue_burst_per_worker");
++  if (idx == ~0)
++    ASSERT (0);
++
++  vlib_stats_segment_t *sm = vlib_stats_get_segment ();
++  vlib_stats_segment_lock ();
++  vlib_stats_entry_t *ep = &sm->directory_vector[idx];
++  ep->data = stat_validate_counter_vector3 (ep->data, num_workers, 0);
++  vlib_stats_segment_unlock ();
++
++  r.private_data = 1;
++  r.collect_fn = stat_provider_queue_burst_per_thread_update_fn;
++  vlib_stats_register_collector_fn (&r);
++}
+diff --git a/src/vlib/stats/stats.c b/src/vlib/stats/stats.c
+index b7743ec70..2453d5fce 100644
+--- a/src/vlib/stats/stats.c
++++ b/src/vlib/stats/stats.c
+@@ -556,6 +556,14 @@ vlib_stats_get_segment_update_rate (void)
+   return sm->update_interval;
+ }
+ 
++/* Overrides weak reference in vlib:node_cli.c */
++f64
++vlib_get_stat_segment_cpuload_rate (void)
++{
++  vlib_stats_segment_t *sm = vlib_stats_get_segment ();
++  return sm->cpuload_interval;
++}
++
+ void
+ vlib_stats_register_collector_fn (vlib_stats_collector_reg_t *reg)
+ {
+diff --git a/src/vlib/stats/stats.h b/src/vlib/stats/stats.h
+index ab1e2828c..877f56770 100644
+--- a/src/vlib/stats/stats.h
++++ b/src/vlib/stats/stats.h
+@@ -68,6 +68,8 @@ typedef struct
+ 
+   /* Update interval */
+   f64 update_interval;
++  /* CPU load interval */
++  f64 cpuload_interval;
+ 
+   clib_spinlock_t *stat_segment_lockp;
+   u32 locking_thread_index;
+@@ -158,6 +160,11 @@ int vlib_stats_validate_will_expand (u32 entry_index, ...);
+ void vlib_stats_remove_entry (u32 entry_index);
+ u32 vlib_stats_find_entry_index (char *fmt, ...);
+ void vlib_stats_register_collector_fn (vlib_stats_collector_reg_t *r);
++void vlib_stats_provider_register_cpu_util (u32 num_workers);
++void vlib_stats_provider_register_queue_burst (u32 num_workers);
++
++f64
++vlib_get_stat_segment_cpuload_rate (void);
+ 
+ format_function_t format_vlib_stats_symlink;
+ 
+-- 
+2.25.1
+
diff --git a/ipm/patches/vpp/23.02/0003-stats-encode-cpu-id-in-utilization-metric.patch b/ipm/patches/vpp/23.02/0003-stats-encode-cpu-id-in-utilization-metric.patch
new file mode 100644
index 0000000..af0062e
--- /dev/null
+++ b/ipm/patches/vpp/23.02/0003-stats-encode-cpu-id-in-utilization-metric.patch
@@ -0,0 +1,40 @@
+From da344f40a6bb7323bea49af7f009433b4ce65660 Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Fri, 16 Aug 2024 13:44:22 +0000
+Subject: [PATCH 3/3] stats: encode cpu id in utilization metric
+
+This patch adds cpu-id field in utilization metric reported by vpp.
+Lower 8-bits is CPU utilization and rest of the bits is cpu-id.
+
+Type: improvement
+---
+ src/vlib/stats/provider_mem.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/src/vlib/stats/provider_mem.c b/src/vlib/stats/provider_mem.c
+index f8e77a9fc..6840ec0d1 100644
+--- a/src/vlib/stats/provider_mem.c
++++ b/src/vlib/stats/provider_mem.c
+@@ -95,7 +95,8 @@ stat_provider_cpu_util_per_thread_update_fn (
+       this_vlib_main = vlib_get_main_by_index (i);
+       /* Set the per-worker cpu util */
+       cb = counters[i];
+-      cb[0] = this_vlib_main->cpu_load_points/100;
++      /* Lower 8-bits is CPU utilization and rest is core id */
++      cb[0] = ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpu_load_points/100));
+     }
+ }
+ 
+@@ -114,7 +115,8 @@ stat_provider_queue_burst_per_thread_update_fn (
+       this_vlib_main = vlib_get_main_by_index (i);
+       /* Set the per-worker queue burst */
+       cb = counters[i];
+-      cb[0] = this_vlib_main->cpuload_burst;
++      /* Lower 8-bits is burst flag and rest is core id */
++      cb[0] = ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpuload_burst));
+     }
+ }
+ 
+-- 
+2.25.1
+
diff --git a/ipm/patches/vpp/23.02/0004-stats-Added-capacity-flag-in-stats.patch b/ipm/patches/vpp/23.02/0004-stats-Added-capacity-flag-in-stats.patch
new file mode 100644
index 0000000..712ca2e
--- /dev/null
+++ b/ipm/patches/vpp/23.02/0004-stats-Added-capacity-flag-in-stats.patch
@@ -0,0 +1,365 @@
+From 8211a9b709d3d23c43935cf131462357a1508886 Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Thu, 3 Oct 2024 15:33:43 +0000
+Subject: [PATCH 1/1] stats: Added capacity flag in stats
+
+Busyness is calculated on how busy the current core is, ignoring the
+current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows
+as 100% busy at 1GHz.
+
+This patch adds a new 'capacity' metric that shows a percentage based on
+the P1 (base) freqency of the core, so that if the core is 50% busy at
+P1, it should show 50% regardless of what the current frequency is.
+
+---
+ src/vlib/cli.c                |  32 +++++
+ src/vlib/main.h               |   1 +
+ src/vlib/stats/collector.c    |   1 +
+ src/vlib/stats/provider_mem.c | 233 ++++++++++++++++++++++++++++++++++
+ src/vlib/stats/stats.h        |   1 +
+ 5 files changed, 268 insertions(+)
+
+diff --git a/src/vlib/cli.c b/src/vlib/cli.c
+index 3c5b7b533..719007d68 100644
+--- a/src/vlib/cli.c
++++ b/src/vlib/cli.c
+@@ -1018,6 +1018,38 @@ VLIB_CLI_COMMAND (show_cpu_load_command, static) = {
+ };
+ /* *INDENT-ON* */
+ 
++static clib_error_t *
++show_cpu_capacity (vlib_main_t * vm, unformat_input_t * input,
++               vlib_cli_command_t * cmd)
++{
++  uword i;
++
++  vlib_cli_output (vm, "%10s | %10s | %12s", "Thread", "Core", "Load %");
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      vlib_main_t *vm_i;
++
++      vm_i = vlib_get_main_by_index (i);
++      if (!vm_i)
++        continue;
++
++      vlib_cli_output (vm, "%8u   | %8u   |   %8.2f", i, vm_i->cpu_id,
++              (f64)vm_i->cpu_capacity / 100.0);
++    }
++
++  return 0;
++}
++
++/* *INDENT-OFF* */
++VLIB_CLI_COMMAND (show_cpu_load_capacity, static) = {
++  .path = "show cpu capacity",
++  .short_help = "Show cpu capacity",
++  .function = show_cpu_capacity,
++  .is_mp_safe = 1,
++};
++/* *INDENT-ON* */
++
+ static clib_error_t *
+ show_cpu (vlib_main_t * vm, unformat_input_t * input,
+ 	  vlib_cli_command_t * cmd)
+diff --git a/src/vlib/main.h b/src/vlib/main.h
+index b5f3bc057..4aeef8dd2 100644
+--- a/src/vlib/main.h
++++ b/src/vlib/main.h
+@@ -121,6 +121,7 @@ typedef struct vlib_main_t
+   u64 cpu_load_clocks;
+   u32 cpu_load_points;
+   u32 cpuload_burst;
++  u64 cpu_capacity;
+ 
+   /* Incremented once for each main loop. */
+   volatile u32 main_loop_count;
+diff --git a/src/vlib/stats/collector.c b/src/vlib/stats/collector.c
+index dafa210be..6d24917c6 100644
+--- a/src/vlib/stats/collector.c
++++ b/src/vlib/stats/collector.c
+@@ -174,6 +174,7 @@ stat_segment_collector_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+   u32 num_worker_thread = vlib_get_n_threads () - 1;
+   vlib_stats_provider_register_cpu_util (num_worker_thread);
+   vlib_stats_provider_register_queue_burst (num_worker_thread);
++  vlib_stats_provider_register_cpu_capacity (num_worker_thread);
+   while (1)
+     {
+       do_stat_segment_updates (vm, sm);
+diff --git a/src/vlib/stats/provider_mem.c b/src/vlib/stats/provider_mem.c
+index 6840ec0d1..f1c5e166d 100644
+--- a/src/vlib/stats/provider_mem.c
++++ b/src/vlib/stats/provider_mem.c
+@@ -5,6 +5,22 @@
+ #include <vlib/vlib.h>
+ #include <vlib/unix/unix.h>
+ #include <vlib/stats/stats.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <fcntl.h>
++#include <unistd.h>
++
++#define MSR_PLATFORM_INFO 0xCE
++#define POWER_SYSFS_CUR_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
++#define POWER_SYSFS_BASE_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
++#define POWER_SYSFS_SCALING_DRIVER_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver"
++#define POWER_SYSFS_SCALING_MAX_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
++#define POWER_SYSFS_MSR_PATH    "/dev/cpu/%u/msr"
++
++#define MAX_LCORE 1280
++#define PATH_MAX  4096
++int current_fds[MAX_LCORE] = {0};
++static int p1_freq[MAX_LCORE] = {0};
+ 
+ static clib_mem_heap_t **memory_heaps_vec;
+ 
+@@ -19,6 +35,173 @@ enum
+   STAT_MEM_RELEASABLE,
+ } stat_mem_usage_e;
+ 
++static int
++try_read_base_frequency(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, lcore_id);
++
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++	close(fd);
++
++	p1_freq[lcore_id] = atoi(buffer);
++	return p1_freq[lcore_id];
++
++
++}
++
++static int
++try_read_scaling_max_freq(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int freq;
++	int fd;
++
++	/*
++	 * If the driver is acpi_cpufreq, we can read the scaling_max_freq file
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, lcore_id);
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	close(fd);
++
++	if (strncmp(buffer, "acpi-cpufreq", 12) == 0) {
++		/* we can use the scaling_max_freq to get the p1 */
++		snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, lcore_id);
++		fd = open(path, O_RDONLY);
++		if (fd == -1) {
++			return -1;
++		}
++		ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++		if (bytesRead == -1) {
++			return -1;
++		}
++		buffer[bytesRead] = '\0'; // Null-terminate the buffer
++		close(fd);
++		freq = atoi(buffer) / 1000; /* convert to KHz */
++
++		/*
++		 * If the freq value ends with '1', then, turbo is enabled.
++		 * Round it down to the nearest 100. Otherwuse use the value.
++		 */
++		return (freq & ~1) * 1000; /* convert to Hz */
++	}
++	return -1;
++}
++
++static int
++try_read_msr(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	int freq;
++	uint64_t data;
++
++	/*
++	 * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, lcore_id);
++	fd = open(path, O_RDONLY);
++	if (fd < 0) {
++		return -1;
++	}
++
++	if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) {
++		close(fd);
++		return -1;
++	}
++
++	close(fd);
++
++	freq = ((data >> 8) & 0xff) * 100 * 1000;
++
++	return freq;
++}
++
++
++static
++int read_sysfs_p1_freq(unsigned int lcore_id) {
++	int freq;
++
++	/* We've previously got the p1 frequency. */
++	if (p1_freq[lcore_id] != 0)
++		return p1_freq[lcore_id];
++
++	/*
++	 * Check the base_frequency file, if it's there
++	 */
++	freq = try_read_base_frequency(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Check the scaling_max_freq file for the acpi-freq driver
++	 */
++	freq = try_read_scaling_max_freq(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Try reading from the MSR register
++	 */
++	freq = try_read_msr(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	return -1;
++}
++
++static
++int read_sysfs_cur_freq(unsigned int lcore_id) {
++	char path[PATH_MAX];
++
++	if (current_fds[lcore_id] == 0) {
++		sprintf(path, POWER_SYSFS_CUR_PATH, lcore_id);
++		current_fds[lcore_id] = open(path, O_RDONLY);
++		if (current_fds[lcore_id] == -1) {
++			perror("Failed to open file");
++			return -1;
++		}
++	}
++
++    char buffer[16];
++    ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0);
++    if (bytesRead == -1) {
++        perror("Failed to read file");
++        return -1;
++    }
++
++    buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++    int value = atoi(buffer);
++    return value;
++}
++
+ /*
+  * Called from the stats periodic process to update memory counters.
+  */
+@@ -120,6 +303,35 @@ stat_provider_queue_burst_per_thread_update_fn (
+     }
+ }
+ 
++static void
++stat_provider_capacity_per_thread_update_fn (
++  vlib_stats_collector_data_t *d)
++{
++  vlib_main_t *this_vlib_main;
++  int i;
++  ASSERT (d->entry->data);
++  counter_t **counters = d->entry->data;
++  counter_t *cb;
++  int core_id = 0;
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      this_vlib_main = vlib_get_main_by_index (i);
++      core_id = this_vlib_main->cpu_id;
++      int cur_freq = read_sysfs_cur_freq(core_id);
++      int p1 = read_sysfs_p1_freq(core_id) ;
++      /* Set the per-worker capacity */
++      cb = counters[i];
++      if (p1 <= 0) {
++        this_vlib_main->cpu_capacity = 0;
++      } else {
++        this_vlib_main->cpu_capacity = (u64)this_vlib_main->cpu_load_points * cur_freq/p1;
++      }
++      /* Lower 8-bits is capacity flag and rest is core id */
++      cb[0] = ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpu_capacity/100));
++    }
++}
++
+ void
+ vlib_stats_provider_register_cpu_util (u32 num_workers)
+ {
+@@ -161,3 +373,24 @@ vlib_stats_provider_register_queue_burst (u32 num_workers)
+   r.collect_fn = stat_provider_queue_burst_per_thread_update_fn;
+   vlib_stats_register_collector_fn (&r);
+ }
++
++void
++vlib_stats_provider_register_cpu_capacity (u32 num_workers)
++{
++  vlib_stats_collector_reg_t r = {};
++
++  u32 idx;
++  r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/capacity_per_worker");
++  if (idx == ~0)
++    ASSERT (0);
++
++  vlib_stats_segment_t *sm = vlib_stats_get_segment ();
++  vlib_stats_segment_lock ();
++  vlib_stats_entry_t *ep = &sm->directory_vector[idx];
++  ep->data = stat_validate_counter_vector3 (ep->data, num_workers, 0);
++  vlib_stats_segment_unlock ();
++
++  r.private_data = 1;
++  r.collect_fn = stat_provider_capacity_per_thread_update_fn;
++  vlib_stats_register_collector_fn (&r);
++}
+diff --git a/src/vlib/stats/stats.h b/src/vlib/stats/stats.h
+index 877f56770..e0f818acd 100644
+--- a/src/vlib/stats/stats.h
++++ b/src/vlib/stats/stats.h
+@@ -162,6 +162,7 @@ u32 vlib_stats_find_entry_index (char *fmt, ...);
+ void vlib_stats_register_collector_fn (vlib_stats_collector_reg_t *r);
+ void vlib_stats_provider_register_cpu_util (u32 num_workers);
+ void vlib_stats_provider_register_queue_burst (u32 num_workers);
++void vlib_stats_provider_register_cpu_capacity (u32 num_workers);
+ 
+ f64
+ vlib_get_stat_segment_cpuload_rate (void);
+-- 
+2.25.1
+
diff --git a/ipm/patches/vpp/24.02/0001-vlib-CPU-load-measurement-and-CLI.patch b/ipm/patches/vpp/24.02/0001-vlib-CPU-load-measurement-and-CLI.patch
new file mode 100644
index 0000000..d781572
--- /dev/null
+++ b/ipm/patches/vpp/24.02/0001-vlib-CPU-load-measurement-and-CLI.patch
@@ -0,0 +1,114 @@
+From 3f400014c353f6b2c62d23b33fd63a4dcb68790a Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Mon, 23 Sep 2024 15:56:59 +0000
+Subject: [PATCH 1/3] vlib: CPU load measurement and CLI
+
+The patch calculates CPU load based on number of ticks ellapsed in
+processing packets by main/worker thread.
+
+New CLI command to query CPU load:
+`show cpu load`
+
+Type: improvement
+---
+ src/vlib/cli.c  | 32 ++++++++++++++++++++++++++++++++
+ src/vlib/main.c | 15 +++++++++++++++
+ src/vlib/main.h |  6 ++++++
+ 3 files changed, 53 insertions(+)
+
+diff --git a/src/vlib/cli.c b/src/vlib/cli.c
+index 5dd25980c..cc6d0e9ec 100644
+--- a/src/vlib/cli.c
++++ b/src/vlib/cli.c
+@@ -986,6 +986,38 @@ VLIB_CLI_COMMAND (show_memory_usage_command, static) = {
+ };
+ /* *INDENT-ON* */
+ 
++static clib_error_t *
++show_cpu_load (vlib_main_t * vm, unformat_input_t * input,
++               vlib_cli_command_t * cmd)
++{
++  uword i;
++
++  vlib_cli_output (vm, "%10s | %10s | %12s", "Thread", "Core", "Load %");
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      vlib_main_t *vm_i;
++
++      vm_i = vlib_get_main_by_index (i);
++      if (!vm_i)
++        continue;
++
++      vlib_cli_output (vm, "%8u   | %8u   |   %8.2f", i, vm_i->cpu_id,
++              (f64)vm_i->cpu_load_points / 100.0);
++    }
++
++  return 0;
++}
++
++/* *INDENT-OFF* */
++VLIB_CLI_COMMAND (show_cpu_load_command, static) = {
++  .path = "show cpu load",
++  .short_help = "Show cpu load",
++  .function = show_cpu_load,
++  .is_mp_safe = 1,
++};
++/* *INDENT-ON* */
++
+ static clib_error_t *
+ show_cpu (vlib_main_t * vm, unformat_input_t * input,
+ 	  vlib_cli_command_t * cmd)
+diff --git a/src/vlib/main.c b/src/vlib/main.c
+index 219ed2200..2193f93f2 100644
+--- a/src/vlib/main.c
++++ b/src/vlib/main.c
+@@ -977,6 +977,9 @@ dispatch_node (vlib_main_t * vm,
+ 				      /* n_vectors */ n,
+ 				      /* n_clocks */ t - last_time_stamp);
+ 
++  if (n)
++    vm->cpu_load_clocks += t - last_time_stamp;
++
+   /* When in adaptive mode and vector rate crosses threshold switch to
+      polling mode and vice versa. */
+   if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE))
+@@ -1690,6 +1693,18 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
+       /* Record time stamp in case there are no enabled nodes and above
+          calls do not update time stamp. */
+       cpu_time_now = clib_cpu_time_now ();
++      /* Time to update cpu load? */
++      if (PREDICT_FALSE (cpu_time_now >= vm->cpu_load_interval_end) )
++        {
++          if (vm->cpu_load_interval_start)
++            {
++              vm->cpu_load_points = (vm->cpu_load_clocks * 1e4) /
++                  (cpu_time_now - vm->cpu_load_interval_start);
++            }
++          vm->cpu_load_interval_start = cpu_time_now;
++          vm->cpu_load_interval_end = cpu_time_now + 1e9;
++          vm->cpu_load_clocks = 0;
++        }
+       vm->loops_this_reporting_interval++;
+       now = clib_time_now_internal (&vm->clib_time, cpu_time_now);
+       /* Time to update loops_per_second? */
+diff --git a/src/vlib/main.h b/src/vlib/main.h
+index 94b8c4fa9..7507ee6b0 100644
+--- a/src/vlib/main.h
++++ b/src/vlib/main.h
+@@ -115,6 +115,12 @@ typedef struct vlib_main_t
+   /* Time stamp when main loop was entered (time 0). */
+   u64 cpu_time_main_loop_start;
+ 
++  /* CPU load measurement */
++  u64 cpu_load_interval_start;
++  u64 cpu_load_interval_end;
++  u64 cpu_load_clocks;
++  u32 cpu_load_points;
++
+   /* Incremented once for each main loop. */
+   volatile u32 main_loop_count;
+ 
+-- 
+2.25.1
+
diff --git a/ipm/patches/vpp/24.02/0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch b/ipm/patches/vpp/24.02/0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch
new file mode 100644
index 0000000..de2b063
--- /dev/null
+++ b/ipm/patches/vpp/24.02/0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch
@@ -0,0 +1,351 @@
+From 0cfde58a9a4cacdd5be0264ee054a2055eed31f3 Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Mon, 23 Sep 2024 16:21:15 +0000
+Subject: [PATCH 2/3] stats: Added CPU load and queue burst flag in stats
+
+This patch adds following capabilities:
+- flag to indicate when number of packets in DPDK queue cross
+configurable queue threshold.
+- Stats config parameter to configure interval for CPU load
+measurement.
+  `cpuload-interval <time in seconds>`
+- Queue threshold parameter in DPDK config section.
+  `queue-threshold <float value between 0 to 1>`
+
+Type: improvement
+---
+ src/plugins/dpdk/device/dpdk.h |  1 +
+ src/plugins/dpdk/device/init.c |  2 +
+ src/plugins/dpdk/device/node.c | 15 +++++-
+ src/vlib/main.c                |  7 ++-
+ src/vlib/main.h                |  1 +
+ src/vlib/node_cli.c            |  7 +++
+ src/vlib/stats/collector.c     |  5 +-
+ src/vlib/stats/init.c          |  4 +-
+ src/vlib/stats/provider_mem.c  | 93 ++++++++++++++++++++++++++++++++++
+ src/vlib/stats/stats.c         |  8 +++
+ src/vlib/stats/stats.h         |  7 +++
+ 11 files changed, 146 insertions(+), 4 deletions(-)
+
+diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
+index 28647913d..34dbe02a8 100644
+--- a/src/plugins/dpdk/device/dpdk.h
++++ b/src/plugins/dpdk/device/dpdk.h
+@@ -283,6 +283,7 @@ typedef struct
+ #define DPDK_MAX_SIMD_BITWIDTH_256     256
+ #define DPDK_MAX_SIMD_BITWIDTH_512     512
+ 
++  f64 queue_threshold;
+   /*
+    * format interface names ala xxxEthernet%d/%d/%d instead of
+    * xxxEthernet%x/%x/%x.
+diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
+index 789add583..ae665fae5 100644
+--- a/src/plugins/dpdk/device/init.c
++++ b/src/plugins/dpdk/device/init.c
+@@ -1071,6 +1071,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
+       else if (unformat (input, "max-simd-bitwidth %U",
+ 			 unformat_max_simd_bitwidth, &conf->max_simd_bitwidth))
+ 	;
++      else if (unformat (input, "queue-threshold %f", &conf->queue_threshold))
++  ;
+       else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
+ 			 &sub_input))
+ 	{
+diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
+index 045b3ff1c..6a23d7479 100644
+--- a/src/plugins/dpdk/device/node.c
++++ b/src/plugins/dpdk/device/node.c
+@@ -544,6 +544,8 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+   uword n_rx_packets = 0;
+   vnet_hw_if_rxq_poll_vector_t *pv;
+   u32 thread_index = vm->thread_index;
++  bool burst = false;
++  int rx_ring_length = 0;
+ 
+   /*
+    * Poll all devices on this cpu for input/interrupts.
+@@ -554,9 +556,20 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+   for (int i = 0; i < vec_len (pv); i++)
+     {
+       xd = vec_elt_at_index (dm->devices, pv[i].dev_instance);
+-      n_rx_packets +=
++
++      /* find the queue occupancy */
++      rx_ring_length = rte_eth_rx_queue_count(xd->port_id, pv[i].queue_id);
++
++      if (rx_ring_length > (xd->conf.n_rx_desc * dm->conf->queue_threshold)) {
++      /* set the burst flag for this lcore */
++        burst = true;
++      }
++
++        n_rx_packets +=
+ 	dpdk_device_input (vm, dm, xd, node, thread_index, pv[i].queue_id);
+     }
++  vm->cpuload_burst = burst;
++
+   return n_rx_packets;
+ }
+ 
+diff --git a/src/vlib/main.c b/src/vlib/main.c
+index 2193f93f2..5ce4e7cd1 100644
+--- a/src/vlib/main.c
++++ b/src/vlib/main.c
+@@ -44,6 +44,7 @@
+ #include <vlib/stats/stats.h>
+ #include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
+ 
++#include <vlib/stats/stats.h>
+ #include <vlib/unix/unix.h>
+ 
+ #define VLIB_FRAME_MAGIC (0xabadc0ed)
+@@ -1457,6 +1458,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
+   f64 now;
+   vlib_frame_queue_main_t *fqm;
+   u32 frame_queue_check_counter = 0;
++  f64 cpuload_interval;
+ 
+   /* Initialize pending node vector. */
+   if (is_main)
+@@ -1474,6 +1476,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
+   else
+     cpu_time_now = clib_cpu_time_now ();
+ 
++  cpuload_interval = vm->clib_time.clocks_per_second *
++      vlib_get_stat_segment_cpuload_rate();
++
+   /* Pre-allocate expired nodes. */
+   if (!nm->polling_threshold_vector_length)
+     nm->polling_threshold_vector_length = 10;
+@@ -1702,7 +1707,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
+                   (cpu_time_now - vm->cpu_load_interval_start);
+             }
+           vm->cpu_load_interval_start = cpu_time_now;
+-          vm->cpu_load_interval_end = cpu_time_now + 1e9;
++          vm->cpu_load_interval_end = cpu_time_now + cpuload_interval;
+           vm->cpu_load_clocks = 0;
+         }
+       vm->loops_this_reporting_interval++;
+diff --git a/src/vlib/main.h b/src/vlib/main.h
+index 7507ee6b0..7df275468 100644
+--- a/src/vlib/main.h
++++ b/src/vlib/main.h
+@@ -120,6 +120,7 @@ typedef struct vlib_main_t
+   u64 cpu_load_interval_end;
+   u64 cpu_load_clocks;
+   u32 cpu_load_points;
++  u32 cpuload_burst;
+ 
+   /* Incremented once for each main loop. */
+   volatile u32 main_loop_count;
+diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
+index 075430e47..81a29c9fb 100644
+--- a/src/vlib/node_cli.c
++++ b/src/vlib/node_cli.c
+@@ -466,6 +466,13 @@ format_vlib_node_stats (u8 * s, va_list * va)
+   return s;
+ }
+ 
++f64 vlib_get_stat_segment_cpuload_rate (void) __attribute__ ((weak));
++f64
++vlib_get_stat_segment_cpuload_rate (void)
++{
++  return 1e70;
++}
++
+ static clib_error_t *
+ show_node_runtime (vlib_main_t * vm,
+ 		   unformat_input_t * input, vlib_cli_command_t * cmd)
+diff --git a/src/vlib/stats/collector.c b/src/vlib/stats/collector.c
+index b23f3df57..3f2b1dc6f 100644
+--- a/src/vlib/stats/collector.c
++++ b/src/vlib/stats/collector.c
+@@ -170,7 +170,10 @@ stat_segment_collector_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+     }
+ 
+   sm->directory_vector[STAT_COUNTER_BOOTTIME].value = unix_time_now ();
+-
++  /* Count number of worker threads only */
++  u32 num_worker_thread = vlib_get_n_threads () - 1;
++  vlib_stats_provider_register_cpu_util (num_worker_thread);
++  vlib_stats_provider_register_queue_burst (num_worker_thread);
+   while (1)
+     {
+       do_stat_segment_updates (vm, sm);
+diff --git a/src/vlib/stats/init.c b/src/vlib/stats/init.c
+index 8b382daf3..5bcadba8d 100644
+--- a/src/vlib/stats/init.c
++++ b/src/vlib/stats/init.c
+@@ -129,7 +129,6 @@ vlib_stats_init (vlib_main_t *vm)
+   vlib_stats_validate (reg.entry_index, 0, vlib_get_n_threads ());
+   vlib_stats_validate (vlib_loops_stats_counter_index, 0,
+ 		       vlib_get_n_threads ());
+-
+   return 0;
+ }
+ 
+@@ -138,6 +137,7 @@ statseg_config (vlib_main_t *vm, unformat_input_t *input)
+ {
+   vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+   sm->update_interval = 10.0;
++  sm->cpuload_interval = 10.0;
+ 
+   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+     {
+@@ -158,6 +158,8 @@ statseg_config (vlib_main_t *vm, unformat_input_t *input)
+ 	sm->node_counters_enabled = 0;
+       else if (unformat (input, "update-interval %f", &sm->update_interval))
+ 	;
++      else if (unformat (input, "cpuload-interval %f", &sm->cpuload_interval))
++  ;
+       else
+ 	return clib_error_return (0, "unknown input `%U'",
+ 				  format_unformat_error, input);
+diff --git a/src/vlib/stats/provider_mem.c b/src/vlib/stats/provider_mem.c
+index f3a3f5d3e..f8e77a9fc 100644
+--- a/src/vlib/stats/provider_mem.c
++++ b/src/vlib/stats/provider_mem.c
+@@ -66,3 +66,96 @@ vlib_stats_register_mem_heap (clib_mem_heap_t *heap)
+   r.collect_fn = stat_provider_mem_usage_update_fn;
+   vlib_stats_register_collector_fn (&r);
+ }
++
++static counter_t **
++stat_validate_counter_vector3 (counter_t **counters, u32 max1, u32 max2)
++{
++  vlib_stats_segment_t *sm = vlib_stats_get_segment ();
++  int i;
++  void *oldheap = clib_mem_set_heap (sm->heap);
++  vec_validate_aligned (counters, max1, CLIB_CACHE_LINE_BYTES);
++  for (i = 0; i <= max1; i++)
++    vec_validate_aligned (counters[i], max2, CLIB_CACHE_LINE_BYTES);
++  clib_mem_set_heap (oldheap);
++  return counters;
++}
++
++static void
++stat_provider_cpu_util_per_thread_update_fn (
++  vlib_stats_collector_data_t *d)
++{
++  vlib_main_t *this_vlib_main;
++  int i;
++  ASSERT (d->entry->data);
++  counter_t **counters = d->entry->data;
++  counter_t *cb;
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      this_vlib_main = vlib_get_main_by_index (i);
++      /* Set the per-worker cpu util */
++      cb = counters[i];
++      cb[0] = this_vlib_main->cpu_load_points/100;
++    }
++}
++
++static void
++stat_provider_queue_burst_per_thread_update_fn (
++  vlib_stats_collector_data_t *d)
++{
++  vlib_main_t *this_vlib_main;
++  int i;
++  ASSERT (d->entry->data);
++  counter_t **counters = d->entry->data;
++  counter_t *cb;
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      this_vlib_main = vlib_get_main_by_index (i);
++      /* Set the per-worker queue burst */
++      cb = counters[i];
++      cb[0] = this_vlib_main->cpuload_burst;
++    }
++}
++
++void
++vlib_stats_provider_register_cpu_util (u32 num_workers)
++{
++  vlib_stats_collector_reg_t r = {};
++
++  u32 idx;
++  r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/cpu_util_per_worker");
++  if (idx == ~0)
++    ASSERT (0);
++
++  vlib_stats_segment_t *sm = vlib_stats_get_segment ();
++  vlib_stats_segment_lock ();
++  vlib_stats_entry_t *ep = &sm->directory_vector[idx];
++  ep->data = stat_validate_counter_vector3 (ep->data, num_workers, 0);
++  vlib_stats_segment_unlock ();
++
++  r.private_data = 1;
++  r.collect_fn = stat_provider_cpu_util_per_thread_update_fn;
++  vlib_stats_register_collector_fn (&r);
++}
++
++void
++vlib_stats_provider_register_queue_burst (u32 num_workers)
++{
++  vlib_stats_collector_reg_t r = {};
++
++  u32 idx;
++  r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/queue_burst_per_worker");
++  if (idx == ~0)
++    ASSERT (0);
++
++  vlib_stats_segment_t *sm = vlib_stats_get_segment ();
++  vlib_stats_segment_lock ();
++  vlib_stats_entry_t *ep = &sm->directory_vector[idx];
++  ep->data = stat_validate_counter_vector3 (ep->data, num_workers, 0);
++  vlib_stats_segment_unlock ();
++
++  r.private_data = 1;
++  r.collect_fn = stat_provider_queue_burst_per_thread_update_fn;
++  vlib_stats_register_collector_fn (&r);
++}
+diff --git a/src/vlib/stats/stats.c b/src/vlib/stats/stats.c
+index b7743ec70..2453d5fce 100644
+--- a/src/vlib/stats/stats.c
++++ b/src/vlib/stats/stats.c
+@@ -556,6 +556,14 @@ vlib_stats_get_segment_update_rate (void)
+   return sm->update_interval;
+ }
+ 
++/* Overrides weak reference in vlib:node_cli.c */
++f64
++vlib_get_stat_segment_cpuload_rate (void)
++{
++  vlib_stats_segment_t *sm = vlib_stats_get_segment ();
++  return sm->cpuload_interval;
++}
++
+ void
+ vlib_stats_register_collector_fn (vlib_stats_collector_reg_t *reg)
+ {
+diff --git a/src/vlib/stats/stats.h b/src/vlib/stats/stats.h
+index ab1e2828c..877f56770 100644
+--- a/src/vlib/stats/stats.h
++++ b/src/vlib/stats/stats.h
+@@ -68,6 +68,8 @@ typedef struct
+ 
+   /* Update interval */
+   f64 update_interval;
++  /* CPU load interval */
++  f64 cpuload_interval;
+ 
+   clib_spinlock_t *stat_segment_lockp;
+   u32 locking_thread_index;
+@@ -158,6 +160,11 @@ int vlib_stats_validate_will_expand (u32 entry_index, ...);
+ void vlib_stats_remove_entry (u32 entry_index);
+ u32 vlib_stats_find_entry_index (char *fmt, ...);
+ void vlib_stats_register_collector_fn (vlib_stats_collector_reg_t *r);
++void vlib_stats_provider_register_cpu_util (u32 num_workers);
++void vlib_stats_provider_register_queue_burst (u32 num_workers);
++
++f64
++vlib_get_stat_segment_cpuload_rate (void);
+ 
+ format_function_t format_vlib_stats_symlink;
+ 
+-- 
+2.25.1
+
diff --git a/ipm/patches/vpp/24.02/0003-stats-encode-cpu-id-in-utilization-metric.patch b/ipm/patches/vpp/24.02/0003-stats-encode-cpu-id-in-utilization-metric.patch
new file mode 100644
index 0000000..0dfcb85
--- /dev/null
+++ b/ipm/patches/vpp/24.02/0003-stats-encode-cpu-id-in-utilization-metric.patch
@@ -0,0 +1,42 @@
+From a37a5ffc11af05f6d580ed293e7af79f3ecb18c3 Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Wed, 25 Sep 2024 16:26:07 +0000
+Subject: [PATCH 3/3] stats: encode cpu id in utilization metric
+
+This patch adds cpu-id field in utilization metric reported by vpp.
+Lower 8-bits is CPU utilization and rest of the bits is cpu-id.
+
+Type: improvement
+Signed-off-by: Vishal Deep Ajmera <vishal.ajmera@intel.com>
+Signed-off-by: Hoang Nguyen <hoang1x.nguyen@intel.com>
+---
+ src/vlib/stats/provider_mem.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/src/vlib/stats/provider_mem.c b/src/vlib/stats/provider_mem.c
+index f8e77a9fc..6840ec0d1 100644
+--- a/src/vlib/stats/provider_mem.c
++++ b/src/vlib/stats/provider_mem.c
+@@ -95,7 +95,8 @@ stat_provider_cpu_util_per_thread_update_fn (
+       this_vlib_main = vlib_get_main_by_index (i);
+       /* Set the per-worker cpu util */
+       cb = counters[i];
+-      cb[0] = this_vlib_main->cpu_load_points/100;
++      /* Lower 8-bits is CPU utilization and rest is core id */
++      cb[0] = ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpu_load_points/100));
+     }
+ }
+ 
+@@ -114,7 +115,8 @@ stat_provider_queue_burst_per_thread_update_fn (
+       this_vlib_main = vlib_get_main_by_index (i);
+       /* Set the per-worker queue burst */
+       cb = counters[i];
+-      cb[0] = this_vlib_main->cpuload_burst;
++      /* Lower 8-bits is burst flag and rest is core id */
++      cb[0] = ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpuload_burst));
+     }
+ }
+ 
+-- 
+2.25.1
+
diff --git a/ipm/patches/vpp/24.02/0004-stats-Added-capacity-flag-in-stats.patch b/ipm/patches/vpp/24.02/0004-stats-Added-capacity-flag-in-stats.patch
new file mode 100644
index 0000000..7930edd
--- /dev/null
+++ b/ipm/patches/vpp/24.02/0004-stats-Added-capacity-flag-in-stats.patch
@@ -0,0 +1,365 @@
+From 0e4ed546136bf9e3dac52eabc8c94a9547d03b21 Mon Sep 17 00:00:00 2001
+From: Hoang Nguyen <hoang1x.nguyen@intel.com>
+Date: Thu, 3 Oct 2024 16:07:33 +0000
+Subject: [PATCH 1/1] stats: Added capacity flag in stats
+
+Busyness is calculated on how busy the current core is, ignoring the
+current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows
+as 100% busy at 1GHz.
+
+This patch adds a new 'capacity' metric that shows a percentage based on
+the P1 (base) freqency of the core, so that if the core is 50% busy at
+P1, it should show 50% regardless of what the current frequency is.
+
+---
+ src/vlib/cli.c                |  32 +++++
+ src/vlib/main.h               |   1 +
+ src/vlib/stats/collector.c    |   1 +
+ src/vlib/stats/provider_mem.c | 233 ++++++++++++++++++++++++++++++++++
+ src/vlib/stats/stats.h        |   1 +
+ 5 files changed, 268 insertions(+)
+
+diff --git a/src/vlib/cli.c b/src/vlib/cli.c
+index cc6d0e9ec..ab12fa045 100644
+--- a/src/vlib/cli.c
++++ b/src/vlib/cli.c
+@@ -1018,6 +1018,38 @@ VLIB_CLI_COMMAND (show_cpu_load_command, static) = {
+ };
+ /* *INDENT-ON* */
+ 
++static clib_error_t *
++show_cpu_capacity (vlib_main_t * vm, unformat_input_t * input,
++               vlib_cli_command_t * cmd)
++{
++  uword i;
++
++  vlib_cli_output (vm, "%10s | %10s | %12s", "Thread", "Core", "Load %");
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      vlib_main_t *vm_i;
++
++      vm_i = vlib_get_main_by_index (i);
++      if (!vm_i)
++        continue;
++
++      vlib_cli_output (vm, "%8u   | %8u   |   %8.2f", i, vm_i->cpu_id,
++              (f64)vm_i->cpu_capacity / 100.0);
++    }
++
++  return 0;
++}
++
++/* *INDENT-OFF* */
++VLIB_CLI_COMMAND (show_cpu_load_capacity, static) = {
++  .path = "show cpu capacity",
++  .short_help = "Show cpu capacity",
++  .function = show_cpu_capacity,
++  .is_mp_safe = 1,
++};
++/* *INDENT-ON* */
++
+ static clib_error_t *
+ show_cpu (vlib_main_t * vm, unformat_input_t * input,
+ 	  vlib_cli_command_t * cmd)
+diff --git a/src/vlib/main.h b/src/vlib/main.h
+index 7df275468..b9c5789b0 100644
+--- a/src/vlib/main.h
++++ b/src/vlib/main.h
+@@ -121,6 +121,7 @@ typedef struct vlib_main_t
+   u64 cpu_load_clocks;
+   u32 cpu_load_points;
+   u32 cpuload_burst;
++  u64 cpu_capacity;
+ 
+   /* Incremented once for each main loop. */
+   volatile u32 main_loop_count;
+diff --git a/src/vlib/stats/collector.c b/src/vlib/stats/collector.c
+index 3f2b1dc6f..7419c2863 100644
+--- a/src/vlib/stats/collector.c
++++ b/src/vlib/stats/collector.c
+@@ -174,6 +174,7 @@ stat_segment_collector_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+   u32 num_worker_thread = vlib_get_n_threads () - 1;
+   vlib_stats_provider_register_cpu_util (num_worker_thread);
+   vlib_stats_provider_register_queue_burst (num_worker_thread);
++  vlib_stats_provider_register_cpu_capacity (num_worker_thread);
+   while (1)
+     {
+       do_stat_segment_updates (vm, sm);
+diff --git a/src/vlib/stats/provider_mem.c b/src/vlib/stats/provider_mem.c
+index 6840ec0d1..f1c5e166d 100644
+--- a/src/vlib/stats/provider_mem.c
++++ b/src/vlib/stats/provider_mem.c
+@@ -5,6 +5,22 @@
+ #include <vlib/vlib.h>
+ #include <vlib/unix/unix.h>
+ #include <vlib/stats/stats.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <fcntl.h>
++#include <unistd.h>
++
++#define MSR_PLATFORM_INFO 0xCE
++#define POWER_SYSFS_CUR_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
++#define POWER_SYSFS_BASE_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
++#define POWER_SYSFS_SCALING_DRIVER_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver"
++#define POWER_SYSFS_SCALING_MAX_FREQ_PATH    "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
++#define POWER_SYSFS_MSR_PATH    "/dev/cpu/%u/msr"
++
++#define MAX_LCORE 1280
++#define PATH_MAX  4096
++int current_fds[MAX_LCORE] = {0};
++static int p1_freq[MAX_LCORE] = {0};
+ 
+ static clib_mem_heap_t **memory_heaps_vec;
+ 
+@@ -19,6 +35,173 @@ enum
+   STAT_MEM_RELEASABLE,
+ } stat_mem_usage_e;
+ 
++static int
++try_read_base_frequency(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, lcore_id);
++
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++	close(fd);
++
++	p1_freq[lcore_id] = atoi(buffer);
++	return p1_freq[lcore_id];
++
++
++}
++
++static int
++try_read_scaling_max_freq(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int freq;
++	int fd;
++
++	/*
++	 * If the driver is acpi_cpufreq, we can read the scaling_max_freq file
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, lcore_id);
++	fd = open(path, O_RDONLY);
++	if (fd == -1) {
++		return -1;
++	}
++	char buffer[16];
++	ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++	if (bytesRead == -1) {
++		return -1;
++	}
++	buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++	close(fd);
++
++	if (strncmp(buffer, "acpi-cpufreq", 12) == 0) {
++		/* we can use the scaling_max_freq to get the p1 */
++		snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, lcore_id);
++		fd = open(path, O_RDONLY);
++		if (fd == -1) {
++			return -1;
++		}
++		ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0);
++		if (bytesRead == -1) {
++			return -1;
++		}
++		buffer[bytesRead] = '\0'; // Null-terminate the buffer
++		close(fd);
++		freq = atoi(buffer) / 1000; /* convert to KHz */
++
++		/*
++		 * If the freq value ends with '1', then, turbo is enabled.
++		 * Round it down to the nearest 100. Otherwuse use the value.
++		 */
++		return (freq & ~1) * 1000; /* convert to Hz */
++	}
++	return -1;
++}
++
++static int
++try_read_msr(unsigned int lcore_id)
++{
++	char path[PATH_MAX];
++	int fd;
++	int freq;
++	uint64_t data;
++
++	/*
++	 * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register
++	 */
++	snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, lcore_id);
++	fd = open(path, O_RDONLY);
++	if (fd < 0) {
++		return -1;
++	}
++
++	if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) {
++		close(fd);
++		return -1;
++	}
++
++	close(fd);
++
++	freq = ((data >> 8) & 0xff) * 100 * 1000;
++
++	return freq;
++}
++
++
++static
++int read_sysfs_p1_freq(unsigned int lcore_id) {
++	int freq;
++
++	/* We've previously got the p1 frequency. */
++	if (p1_freq[lcore_id] != 0)
++		return p1_freq[lcore_id];
++
++	/*
++	 * Check the base_frequency file, if it's there
++	 */
++	freq = try_read_base_frequency(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Check the scaling_max_freq file for the acpi-freq driver
++	 */
++	freq = try_read_scaling_max_freq(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	/*
++	 * Try reading from the MSR register
++	 */
++	freq = try_read_msr(lcore_id);
++	if (freq != -1) {
++		p1_freq[lcore_id] = freq;
++		return freq;
++	}
++
++	return -1;
++}
++
++static
++int read_sysfs_cur_freq(unsigned int lcore_id) {
++	char path[PATH_MAX];
++
++	if (current_fds[lcore_id] == 0) {
++		sprintf(path, POWER_SYSFS_CUR_PATH, lcore_id);
++		current_fds[lcore_id] = open(path, O_RDONLY);
++		if (current_fds[lcore_id] == -1) {
++			perror("Failed to open file");
++			return -1;
++		}
++	}
++
++    char buffer[16];
++    ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0);
++    if (bytesRead == -1) {
++        perror("Failed to read file");
++        return -1;
++    }
++
++    buffer[bytesRead] = '\0'; // Null-terminate the buffer
++
++    int value = atoi(buffer);
++    return value;
++}
++
+ /*
+  * Called from the stats periodic process to update memory counters.
+  */
+@@ -120,6 +303,35 @@ stat_provider_queue_burst_per_thread_update_fn (
+     }
+ }
+ 
++static void
++stat_provider_capacity_per_thread_update_fn (
++  vlib_stats_collector_data_t *d)
++{
++  vlib_main_t *this_vlib_main;
++  int i;
++  ASSERT (d->entry->data);
++  counter_t **counters = d->entry->data;
++  counter_t *cb;
++  int core_id = 0;
++
++  for (i = 0; i < vlib_get_n_threads (); i++)
++    {
++      this_vlib_main = vlib_get_main_by_index (i);
++      core_id = this_vlib_main->cpu_id;
++      int cur_freq = read_sysfs_cur_freq(core_id);
++      int p1 = read_sysfs_p1_freq(core_id) ;
++      /* Set the per-worker capacity */
++      cb = counters[i];
++      if (p1 <= 0) {
++        this_vlib_main->cpu_capacity = 0;
++      } else {
++        this_vlib_main->cpu_capacity = (u64)this_vlib_main->cpu_load_points * cur_freq/p1;
++      }
++      /* Lower 8-bits is capacity flag and rest is core id */
++      cb[0] = ((this_vlib_main->cpu_id << 8) | (this_vlib_main->cpu_capacity/100));
++    }
++}
++
+ void
+ vlib_stats_provider_register_cpu_util (u32 num_workers)
+ {
+@@ -161,3 +373,24 @@ vlib_stats_provider_register_queue_burst (u32 num_workers)
+   r.collect_fn = stat_provider_queue_burst_per_thread_update_fn;
+   vlib_stats_register_collector_fn (&r);
+ }
++
++void
++vlib_stats_provider_register_cpu_capacity (u32 num_workers)
++{
++  vlib_stats_collector_reg_t r = {};
++
++  u32 idx;
++  r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/capacity_per_worker");
++  if (idx == ~0)
++    ASSERT (0);
++
++  vlib_stats_segment_t *sm = vlib_stats_get_segment ();
++  vlib_stats_segment_lock ();
++  vlib_stats_entry_t *ep = &sm->directory_vector[idx];
++  ep->data = stat_validate_counter_vector3 (ep->data, num_workers, 0);
++  vlib_stats_segment_unlock ();
++
++  r.private_data = 1;
++  r.collect_fn = stat_provider_capacity_per_thread_update_fn;
++  vlib_stats_register_collector_fn (&r);
++}
+diff --git a/src/vlib/stats/stats.h b/src/vlib/stats/stats.h
+index 877f56770..e0f818acd 100644
+--- a/src/vlib/stats/stats.h
++++ b/src/vlib/stats/stats.h
+@@ -162,6 +162,7 @@ u32 vlib_stats_find_entry_index (char *fmt, ...);
+ void vlib_stats_register_collector_fn (vlib_stats_collector_reg_t *r);
+ void vlib_stats_provider_register_cpu_util (u32 num_workers);
+ void vlib_stats_provider_register_queue_burst (u32 num_workers);
++void vlib_stats_provider_register_cpu_capacity (u32 num_workers);
+ 
+ f64
+ vlib_get_stat_segment_cpuload_rate (void);
+-- 
+2.25.1
+
diff --git a/msrtool/rw_msr_tool.py b/msrtool/rw_msr_tool.py
new file mode 100755
index 0000000..cdd64a8
--- /dev/null
+++ b/msrtool/rw_msr_tool.py
@@ -0,0 +1,166 @@
+#!/usr/bin/python3
+# Copyright(c) 2019-24 Intel Corporation
+"""
+Register read and write utility
+"""
+
+from __future__ import print_function
+import argparse
+import logging
+import os
+import sys
+
+
+def setup_logging(enable_logging, logfile_name):
+    """ Setup Logging """
+    if enable_logging:
+        logging.basicConfig(filename=logfile_name,
+                            level=logging.DEBUG,
+                            format='%(asctime)s %(levelname)s: %(message)s')
+        log_or_print("Logging enabled")
+    else:
+        logging.disable(logging.CRITICAL)
+
+def log_or_print(message, level='debug'):
+    """ Log to file or print message to stdout """
+    if logging.getLogger().isEnabledFor(logging.DEBUG):
+        if level == 'debug':
+            logging.getLogger().debug(message)
+        elif level == 'info':
+            logging.getLogger().info(message)
+        elif level == 'error':
+            logging.getLogger().error(message)
+    else:
+        print(message)
+
+def rdmsr(core, msr):
+    """  Read a MSR through via msr driver. """
+    # Convert hex 0xMSR to int
+    msr = hex_int(msr)
+
+    # Open file to read MSR
+    try:
+        msr_filename = os.path.join("/dev/cpu/", str(core), "msr")
+        msr_file = os.open(msr_filename, os.O_RDONLY)
+        os.lseek(msr_file, msr, os.SEEK_SET)
+        rdmsr_bytes = os.read(msr_file, 8)
+        os.close(msr_file)
+
+        # format output and print
+        rdmsr_int_val = int.from_bytes(rdmsr_bytes, byteorder='big')
+        rdmsr_bytes_little_endian = rdmsr_int_val.to_bytes(8, byteorder='little')
+        log_or_print(f"Read value : {rdmsr_bytes_little_endian}, from MSR : {hex(msr)} on Core: {core}", level='debug')
+
+        return rdmsr_bytes
+    except (IOError, OSError) as err:
+        log_or_print(f"Error:{err} Could not read MSR:{hex(msr)} from {msr_filename} on core : {core}", level='error')
+        sys.exit(1)
+
+
+def wrmsr(core, msr, msr_bytes):
+    """ Writes an MSR via msr driver interface. """
+    # Convert hex 0xMsr to int
+    msr = hex_int(msr)
+
+    # Pad msr_bytes to 8 bytes
+    msr_bytes = msr_bytes.ljust(8, b'\x00')
+
+    try:
+        msr_filename = os.path.join("/dev/cpu/", str(core), "msr")
+        with open(msr_filename, "wb") as msr_file:
+            # format output and print
+            wrmsr_int_val = int.from_bytes(msr_bytes, byteorder='big')
+            wrmsr_bytes_little_endian = wrmsr_int_val.to_bytes(8, byteorder='little')
+            log_or_print(f"Write to MSR:{hex(msr)} File: {msr_filename} Core: {core} Bytes: {wrmsr_bytes_little_endian}", level='debug')
+            msr_file.seek(msr)
+            msr_file.write(msr_bytes)
+    except (IOError, OSError) as err:
+        log_or_print(f"Error:{err} Could not write to MSR: {hex(msr)} on Core: {core} ", level='error')
+        sys.exit(1)
+
+def check_msr_driver():
+    """ Check msr driver interface is available. """
+    try:
+        with open("/dev/cpu/0/msr", "r") as msr_file:
+            pass
+    except IOError:
+        log_or_print("ERROR: 'msr' kernel module not found", level='error')
+        sys.exit(1)
+
+def range_expand(s):
+    """ Parse core list from user input. """
+    try:
+        r = []
+        for i in s.split(','):
+            if '-' not in i:
+                r.append(int(i))
+            else:
+                l, h = map(int, i.split('-'))
+                r += range(l, h+1)
+        return r
+    except ValueError:
+        return None
+
+# Parse command line
+def parse_args():
+    """ Setup and Parse command line arguments. """
+    parser = argparse.ArgumentParser(
+            description="Script to read and write registers\n\n"
+                        "Usage examples:\n"
+                        "Read MSR 0x621 on cores 0,1,2: rw-msr-tool.py -c 0,1,2 -m 0x621 -r\n"
+                        "Write 0x1213 to MSR 0x620 on cores 0,1,2: rw-msr-tool.py -c 0,1,2 -m 0x620 -w 0x1213\n",
+    formatter_class=argparse.RawTextHelpFormatter)
+
+    parser.add_argument('-c', '--cores', type=str, required=True, help='Comma-separated list of cores or core ranges (e.g., "0,1,2-4")')
+    parser.add_argument('-m', '--msr', type=str, required=True, help='MSR address to read/write (in hex)')
+    parser.add_argument('-w', '--write', type=str, help='Write value to the MSR (in hex)')
+    parser.add_argument('-r', '--read', action='store_true', help='Read from the MSR')
+    parser.add_argument('-l', '--log', action='store_true', help='Enable logging to file (default: disabled and prints to stdout)')
+    parser.add_argument('-f', '--logfile', type=str, default='rdwr-msr.log', help='Log file name (default: rdwr-msr.log)')
+    return parser.parse_args()
+
+def hex_int(value):
+    """ Convert hex string to int. """
+    try:
+        # Check if the value starts with '0x' and remove it if it does
+        if value.startswith('0x'):
+            value = value[2:]
+        return int(value, 16)
+    except ValueError:
+        raise argparse.ArgumentTypeError(f"Invalid hex value: {value}")
+
+def main():
+    """ Main function. """
+    # Read command line args
+    args = parse_args()
+
+    # Check msr driver is loaded
+    check_msr_driver()
+
+    # Conifgure logging or printing
+    setup_logging(args.log, args.logfile)
+
+    # Get core list
+    cores = range_expand(args.cores)
+
+    # Check core range is valid
+    if cores is None:
+        log_or_print("Invalid core range, Cores: {cores}", level='error')
+        sys.exit(1)
+
+    # Read value and print or log
+    if args.read:
+        for core in cores:
+            rdmsr(core, args.msr)
+
+    if args.write:
+        # Get hex string without 0x prefix
+        writevalue_int = hex_int(args.write)
+        # Convert int to bytes
+        writevalue = writevalue_int.to_bytes(8, byteorder='little')
+
+        for core in cores:
+            wrmsr(core, args.msr, writevalue)
+
+if __name__ == "__main__":
+    main()