diff --git a/tests/tt_metal/tt_metal/dispatch/dispatch_util/test_dispatch_settings.cpp b/tests/tt_metal/tt_metal/dispatch/dispatch_util/test_dispatch_settings.cpp
index cb31344a3498..94889e855843 100644
--- a/tests/tt_metal/tt_metal/dispatch/dispatch_util/test_dispatch_settings.cpp
+++ b/tests/tt_metal/tt_metal/dispatch/dispatch_util/test_dispatch_settings.cpp
@@ -15,8 +15,8 @@ using namespace tt::tt_metal;
 
 // Loop through test_func for WORKER, ETH X 1, 2 CQs
 void ForEachCoreTypeXHWCQs(const std::function<void(const CoreType& core_type, const uint32_t num_hw_cqs)>& test_func) {
-    static constexpr auto core_types_to_test = std::array<CoreType, 2>{CoreType::WORKER, CoreType::ETH};
-    static constexpr auto num_hw_cqs_to_test = std::array<uint32_t, 2>{1, 2};
+    const auto core_types_to_test = std::array<CoreType, 2>{CoreType::WORKER, CoreType::ETH};
+    const auto num_hw_cqs_to_test = std::array<uint32_t, 2>{1, 2};
 
     for (const auto& core_type : core_types_to_test) {
         if (core_type == CoreType::ETH &&
@@ -33,7 +33,7 @@ void ForEachCoreTypeXHWCQs(const std::function<void(const CoreType& core_type, c
 
 TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsDefaultUnsupportedCoreType) {
     const auto unsupported_core = CoreType::ARC;
-    EXPECT_THROW(DispatchSettings::defaults(unsupported_core, tt::Cluster::instance(), 1), std::runtime_error);
+    EXPECT_THROW(DispatchSettings::get(unsupported_core, 1), std::runtime_error);
 }
 
 TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsMissingArgs) {
@@ -42,8 +42,8 @@ TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsMissingArgs) {
 }
 
 TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsEq) {
-    static constexpr uint32_t hw_cqs = 2;
-    auto settings = DispatchSettings::worker_defaults(tt::Cluster::instance(), hw_cqs);
+    const uint32_t hw_cqs = 2;
+    auto settings = DispatchSettings::get(CoreType::WORKER, hw_cqs);
     auto settings_2 = settings; // Copy
     EXPECT_EQ(settings, settings_2);
     settings_2.dispatch_size_ += 1;
@@ -51,51 +51,112 @@ TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsEq) {
 }
 
 TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsSetPrefetchDBuffer) {
-    static constexpr uint32_t hw_cqs = 2;
-    static constexpr uint32_t expected_buffer_bytes = 0xcafe;
-    static constexpr uint32_t expected_page_count = expected_buffer_bytes / (1 << DispatchSettings::PREFETCH_D_BUFFER_LOG_PAGE_SIZE);
-    auto settings = DispatchSettings::worker_defaults(tt::Cluster::instance(), hw_cqs);
+    const uint32_t hw_cqs = 2;
+    const uint32_t expected_buffer_bytes = 0xcafe;
+    const uint32_t expected_page_count =
+        expected_buffer_bytes / (1 << DispatchSettings::PREFETCH_D_BUFFER_LOG_PAGE_SIZE);
+    auto settings = DispatchSettings::get(CoreType::WORKER, hw_cqs);
     settings.prefetch_d_buffer_size(expected_buffer_bytes);
     EXPECT_EQ(settings.prefetch_d_buffer_size_, expected_buffer_bytes);
     EXPECT_EQ(settings.prefetch_d_pages_, expected_page_count);
 }
 
 TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsSetPrefetchQBuffer) {
-    static constexpr uint32_t hw_cqs = 2;
-    static constexpr uint32_t expected_buffer_entries = 0x1000;
-    static constexpr uint32_t expected_buffer_bytes = expected_buffer_entries * sizeof(DispatchSettings::prefetch_q_entry_type);
-    auto settings = DispatchSettings::worker_defaults(tt::Cluster::instance(), hw_cqs);
+    const uint32_t hw_cqs = 2;
+    const uint32_t expected_buffer_entries = 0x1000;
+    const uint32_t expected_buffer_bytes = expected_buffer_entries * sizeof(DispatchSettings::prefetch_q_entry_type);
+    auto settings = DispatchSettings::get(CoreType::WORKER, hw_cqs);
     settings.prefetch_q_entries(expected_buffer_entries);
     EXPECT_EQ(settings.prefetch_q_entries_, expected_buffer_entries);
     EXPECT_EQ(settings.prefetch_q_size_, expected_buffer_bytes);
 }
 
 TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsSetDispatchBuffer) {
-    static constexpr uint32_t hw_cqs = 2;
-    static constexpr uint32_t expected_buffer_bytes = 0x2000;
-    static constexpr uint32_t expected_page_count = expected_buffer_bytes / (1 << DispatchSettings::DISPATCH_BUFFER_LOG_PAGE_SIZE);
-    auto settings = DispatchSettings::worker_defaults(tt::Cluster::instance(), hw_cqs);
+    const uint32_t hw_cqs = 2;
+    const uint32_t expected_buffer_bytes = 0x2000;
+    const uint32_t expected_page_count = expected_buffer_bytes / (1 << DispatchSettings::DISPATCH_BUFFER_LOG_PAGE_SIZE);
+    auto settings = DispatchSettings::get(CoreType::WORKER, hw_cqs);
     settings.dispatch_size(expected_buffer_bytes);
     EXPECT_EQ(settings.dispatch_size_, expected_buffer_bytes);
     EXPECT_EQ(settings.dispatch_pages_, expected_page_count);
 }
 
 TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsSetDispatchSBuffer) {
-    static constexpr uint32_t hw_cqs = 2;
-    static constexpr uint32_t expected_buffer_bytes = 0x2000;
-    static constexpr uint32_t expected_page_count = expected_buffer_bytes / (1 << DispatchSettings::DISPATCH_S_BUFFER_LOG_PAGE_SIZE);
-    auto settings = DispatchSettings::worker_defaults(tt::Cluster::instance(), hw_cqs);
+    const uint32_t hw_cqs = 2;
+    const uint32_t expected_buffer_bytes = 0x2000;
+    const uint32_t expected_page_count =
+        expected_buffer_bytes / (1 << DispatchSettings::DISPATCH_S_BUFFER_LOG_PAGE_SIZE);
+    auto settings = DispatchSettings::get(CoreType::WORKER, hw_cqs);
     settings.dispatch_s_buffer_size(expected_buffer_bytes);
     EXPECT_EQ(settings.dispatch_s_buffer_size_, expected_buffer_bytes);
     EXPECT_EQ(settings.dispatch_s_buffer_pages_, expected_page_count);
 }
 
 TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsSetTunnelerBuffer) {
-    static constexpr uint32_t hw_cqs = 2;
-    static constexpr uint32_t expected_buffer_bytes = 0x2000;
-    static constexpr uint32_t expected_page_count = expected_buffer_bytes / (1 << DispatchSettings::PREFETCH_D_BUFFER_LOG_PAGE_SIZE);
-    auto settings = DispatchSettings::worker_defaults(tt::Cluster::instance(), hw_cqs);
+    const uint32_t hw_cqs = 2;
+    const uint32_t expected_buffer_bytes = 0x2000;
+    const uint32_t expected_page_count =
+        expected_buffer_bytes / (1 << DispatchSettings::PREFETCH_D_BUFFER_LOG_PAGE_SIZE);
+    auto settings = DispatchSettings::get(CoreType::WORKER, hw_cqs);
     settings.tunneling_buffer_size(expected_buffer_bytes);
     EXPECT_EQ(settings.tunneling_buffer_size_, expected_buffer_bytes);
     EXPECT_EQ(settings.tunneling_buffer_pages_, expected_page_count);
 }
+
+TEST_F(CommandQueueSingleCardFixture, TestDispatchSettingsMutations) {
+    if (hal.get_programmable_core_type_index(tt::tt_metal::HalProgrammableCoreType::IDLE_ETH) == -1) {
+        // This device does not have the eth core
+        tt::log_info(tt::LogTest, "Test not supported on this device");
+        return;
+    }
+    const auto core_type = CoreType::WORKER;
+    const uint32_t hw_cqs = 1;
+    const uint32_t prefetch_d_size = 0x1000;
+    const uint32_t mux_size = 0x2000;
+    const uint32_t cmddat_size = 0x2000;
+    const uint32_t dispatch_s_size = 32;
+    const uint32_t dispatch_size = 4096;
+    const uint32_t max_cmd_size = 1024;
+    const uint32_t prefetch_q_entries = 512;
+    const uint32_t scratch_db_size = 5120;
+
+    auto& settings = DispatchSettings::get(core_type, hw_cqs);
+    DispatchSettings original_settings = settings;  // Copy the original to be restored later
+
+    EXPECT_EQ(settings.core_type_, core_type);
+
+    // Modify settings
+    settings.prefetch_q_entries(prefetch_q_entries);
+    settings.prefetch_d_buffer_size(prefetch_d_size);
+    settings.tunneling_buffer_size(mux_size);
+    settings.prefetch_cmddat_q_size(cmddat_size);
+    settings.dispatch_s_buffer_size(dispatch_s_size);
+    settings.prefetch_max_cmd_size(max_cmd_size);
+    settings.prefetch_scratch_db_size(scratch_db_size);
+    settings.dispatch_size(dispatch_size);
+
+    // Change instance
+    // Check they are not the same
+    auto& settings_2 = DispatchSettings::get(CoreType::ETH, hw_cqs);
+    EXPECT_NE(settings_2.prefetch_q_entries_, prefetch_q_entries);
+    EXPECT_NE(settings_2.prefetch_d_buffer_size_, prefetch_d_size);
+    EXPECT_NE(settings_2.tunneling_buffer_size_, mux_size);
+    EXPECT_NE(settings_2.prefetch_cmddat_q_size_, cmddat_size);
+    EXPECT_NE(settings_2.dispatch_s_buffer_size_, dispatch_s_size);
+    EXPECT_NE(settings_2.prefetch_max_cmd_size_, max_cmd_size);
+    EXPECT_NE(settings_2.prefetch_scratch_db_size_, scratch_db_size);
+    EXPECT_NE(settings_2.dispatch_size_, dispatch_size);
+
+    // Change back to the instance that we modified
+    auto& settings_3 = DispatchSettings::get(core_type, hw_cqs);
+    EXPECT_EQ(settings_3.prefetch_q_entries_, prefetch_q_entries);
+    EXPECT_EQ(settings_3.prefetch_d_buffer_size_, prefetch_d_size);
+    EXPECT_EQ(settings_3.tunneling_buffer_size_, mux_size);
+    EXPECT_EQ(settings_3.prefetch_cmddat_q_size_, cmddat_size);
+    EXPECT_EQ(settings_3.dispatch_s_buffer_size_, dispatch_s_size);
+    EXPECT_EQ(settings_3.prefetch_max_cmd_size_, max_cmd_size);
+    EXPECT_EQ(settings_3.prefetch_scratch_db_size_, scratch_db_size);
+    EXPECT_EQ(settings_3.dispatch_size_, dispatch_size);
+
+    DispatchSettings::initialize(original_settings);
+}
diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
index 5a4434c1abce..ab2483709e2f 100644
--- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
+++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_prefetcher.cpp
@@ -15,6 +15,7 @@
 #include <tt-metalium/command_queue_interface.hpp>
 #include <tt-metalium/dispatch_settings.hpp>
 #include "common.h"
+#include "tt_cluster.hpp"
 #include "tt_metal/impl/dispatch/kernels/packet_queue_ctrl.hpp"
 #include "tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/traffic_gen_test.hpp"
 
@@ -55,6 +56,8 @@ constexpr uint32_t PCIE_TRANSFER_SIZE_DEFAULT = 4096;
 
 constexpr uint32_t host_data_dirty_pattern = 0xbaadf00d;
 
+constexpr CoreType DISPATCH_CORE_TYPE = CoreType::WORKER;
+
 //////////////////////////////////////////////////////////////////////////////////////////
 // Test dispatch program performance
 //
@@ -113,6 +116,8 @@ uint32_t l1_buf_base_g;
 uint32_t test_device_id_g = 0;
 
 void init(int argc, char** argv) {
+    auto default_settings = DispatchSettings::defaults(DISPATCH_CORE_TYPE, tt::Cluster::instance(), 1);
+
     std::vector<std::string> input_args(argv, argv + argc);
 
     if (test_args::has_command_option(input_args, "-h") || test_args::has_command_option(input_args, "--help")) {
@@ -138,10 +143,7 @@ void init(int argc, char** argv) {
         log_info(LogTest, "  -hp: host huge page issue buffer size (default {})", DEFAULT_HUGEPAGE_ISSUE_BUFFER_SIZE);
         log_info(LogTest, "  -pq: prefetch queue entries (default {})", DEFAULT_PREFETCH_Q_ENTRIES);
         log_info(LogTest, "  -cs: cmddat q size (default {})", DEFAULT_CMDDAT_Q_SIZE);
-        log_info(
-            LogTest,
-            "-pdcs: prefetch_d cmddat cb size (default {})",
-            DispatchMemMap::get(CoreType::WORKER, 1).prefetch_d_buffer_size());
+        log_info(LogTest, "-pdcs: prefetch_d cmddat cb size (default {})", default_settings.prefetch_d_buffer_size_);
         log_info(LogTest, "  -ss: scratch cb size (default {})", DEFAULT_SCRATCH_DB_SIZE);
         log_info(
             LogTest,
@@ -174,8 +176,8 @@ void init(int argc, char** argv) {
     pcie_transfer_size_g = test_args::get_command_option_uint32(input_args, "-pcies", PCIE_TRANSFER_SIZE_DEFAULT);
     dram_page_size_g = test_args::get_command_option_uint32(input_args, "-dpgs", DRAM_PAGE_SIZE_DEFAULT);
     dram_pages_to_read_g = test_args::get_command_option_uint32(input_args, "-dpgr", DRAM_PAGES_TO_READ_DEFAULT);
-    prefetch_d_buffer_size_g = test_args::get_command_option_uint32(
-        input_args, "-pdcs", DispatchMemMap::get(CoreType::WORKER, 1).prefetch_d_buffer_size());
+    prefetch_d_buffer_size_g =
+        test_args::get_command_option_uint32(input_args, "-pdcs", default_settings.prefetch_d_buffer_size_);
 
     test_type_g = test_args::get_command_option_uint32(input_args, "-t", DEFAULT_TEST_TYPE);
     all_workers_g.end_coord.x = test_args::get_command_option_uint32(input_args, "-wx", all_workers_g.end_coord.x);
@@ -849,7 +851,7 @@ void gen_rnd_linear_cmd(
     size &= ~(sizeof(uint32_t) - 1);
     uint32_t offset = std::rand() % dispatch_buffer_page_size_g;
     offset = (offset >> 2) << 2;
-    device_data.relevel(CoreType::WORKER);  // XXXXX shouldn't be needed
+    device_data.relevel(DISPATCH_CORE_TYPE);  // XXXXX shouldn't be needed
     if (device_data.size_at(worker_core, 0) * sizeof(uint32_t) < max_linear_cmd_read_size + offset) {
         // Not enough data yet, just bail on this cmd
         return;
@@ -1699,8 +1701,7 @@ void configure_for_single_chip(
     uint32_t& packetized_path_test_results_addr,
     uint32_t packetized_path_test_results_size,
     uint32_t dev_hugepage_base_g) {
-    const CoreType dispatch_core_type = CoreType::WORKER;
-    uint32_t dispatch_buffer_pages = DispatchMemMap::get(dispatch_core_type, 1).dispatch_buffer_block_size_pages() *
+    uint32_t dispatch_buffer_pages = DispatchMemMap::get(DISPATCH_CORE_TYPE, 1).dispatch_buffer_block_size_pages() *
                                      DispatchSettings::DISPATCH_BUFFER_SIZE_BLOCKS;
     uint32_t num_compute_cores =
         device->compute_with_storage_grid_size().x * device->compute_with_storage_grid_size().y;
@@ -1766,9 +1767,9 @@ void configure_for_single_chip(
     uint32_t* host_hugepage_completion_buffer = (uint32_t*)host_hugepage_completion_buffer_base_g;
     vector<uint32_t> tmp = {dev_hugepage_completion_buffer_base >> 4};
     CoreCoord phys_dispatch_host_core = split_dispatcher_g ? phys_dispatch_h_core : phys_dispatch_core;
-    uint32_t completion_q_wr_ptr = DispatchMemMap::get(dispatch_core_type)
+    uint32_t completion_q_wr_ptr = DispatchMemMap::get(DISPATCH_CORE_TYPE)
                                        .get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_WR);
-    uint32_t completion_q_rd_ptr = DispatchMemMap::get(dispatch_core_type)
+    uint32_t completion_q_rd_ptr = DispatchMemMap::get(DISPATCH_CORE_TYPE)
                                        .get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);
     tt::llrt::write_hex_vec_to_core(device->id(), phys_dispatch_host_core, tmp, completion_q_wr_ptr);
     tt::llrt::write_hex_vec_to_core(device->id(), phys_dispatch_host_core, tmp, completion_q_rd_ptr);
@@ -2105,19 +2106,19 @@ void configure_for_single_chip(
     }
 
     uint32_t host_completion_queue_wr_ptr =
-        DispatchMemMap::get(CoreType::WORKER).get_host_command_queue_addr(CommandQueueHostAddrType::COMPLETION_Q_WR);
+        DispatchMemMap::get(DISPATCH_CORE_TYPE).get_host_command_queue_addr(CommandQueueHostAddrType::COMPLETION_Q_WR);
     uint32_t dev_completion_queue_wr_ptr =
-        DispatchMemMap::get(CoreType::WORKER)
+        DispatchMemMap::get(DISPATCH_CORE_TYPE)
             .get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_WR);
     uint32_t dev_completion_queue_rd_ptr =
-        DispatchMemMap::get(CoreType::WORKER)
+        DispatchMemMap::get(DISPATCH_CORE_TYPE)
             .get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);
 
     std::vector<uint32_t> dispatch_compile_args = {
         dispatch_buffer_base,
         DispatchSettings::DISPATCH_BUFFER_LOG_PAGE_SIZE,
         DispatchSettings::DISPATCH_BUFFER_SIZE_BLOCKS *
-            DispatchMemMap::get(dispatch_core_type, 1).dispatch_buffer_block_size_pages(),
+            DispatchMemMap::get(DISPATCH_CORE_TYPE, 1).dispatch_buffer_block_size_pages(),
         dispatch_cb_sem,  // overridden below for h
         split_prefetcher_g ? prefetch_d_downstream_cb_sem
                            : prefetch_downstream_cb_sem,  // overridden below for dispatch_h
@@ -2395,8 +2396,7 @@ void configure_for_multi_chip(
     uint32_t& packetized_path_test_results_addr,
     uint32_t packetized_path_test_results_size,
     uint32_t dev_hugepage_base_g) {
-    const CoreType dispatch_core_type = CoreType::WORKER;
-    uint32_t dispatch_buffer_pages = DispatchMemMap::get(dispatch_core_type, 1).dispatch_buffer_block_size_pages() *
+    uint32_t dispatch_buffer_pages = DispatchMemMap::get(DISPATCH_CORE_TYPE, 1).dispatch_buffer_block_size_pages() *
                                      DispatchSettings::DISPATCH_BUFFER_SIZE_BLOCKS;
     uint32_t num_compute_cores =
         device->compute_with_storage_grid_size().x * device->compute_with_storage_grid_size().y;
@@ -2478,9 +2478,9 @@ void configure_for_multi_chip(
     uint32_t* host_hugepage_completion_buffer = (uint32_t*)host_hugepage_completion_buffer_base_g;
     vector<uint32_t> tmp = {dev_hugepage_completion_buffer_base >> 4};
     CoreCoord phys_dispatch_host_core = split_dispatcher_g ? phys_dispatch_h_core : phys_dispatch_core;
-    uint32_t completion_q_wr_ptr = DispatchMemMap::get(dispatch_core_type)
+    uint32_t completion_q_wr_ptr = DispatchMemMap::get(DISPATCH_CORE_TYPE)
                                        .get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_WR);
-    uint32_t completion_q_rd_ptr = DispatchMemMap::get(dispatch_core_type)
+    uint32_t completion_q_rd_ptr = DispatchMemMap::get(DISPATCH_CORE_TYPE)
                                        .get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);
     tt::llrt::write_hex_vec_to_core(device->id(), phys_dispatch_host_core, tmp, completion_q_wr_ptr);
     tt::llrt::write_hex_vec_to_core(device->id(), phys_dispatch_host_core, tmp, completion_q_rd_ptr);
@@ -2972,18 +2972,18 @@ void configure_for_multi_chip(
     }
 
     uint32_t host_completion_queue_wr_ptr =
-        DispatchMemMap::get(CoreType::WORKER).get_host_command_queue_addr(CommandQueueHostAddrType::COMPLETION_Q_WR);
+        DispatchMemMap::get(DISPATCH_CORE_TYPE).get_host_command_queue_addr(CommandQueueHostAddrType::COMPLETION_Q_WR);
     uint32_t dev_completion_queue_wr_ptr =
-        DispatchMemMap::get(CoreType::WORKER)
+        DispatchMemMap::get(DISPATCH_CORE_TYPE)
             .get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_WR);
     uint32_t dev_completion_queue_rd_ptr =
-        DispatchMemMap::get(CoreType::WORKER)
+        DispatchMemMap::get(DISPATCH_CORE_TYPE)
             .get_device_command_queue_addr(CommandQueueDeviceAddrType::COMPLETION_Q_RD);
     std::vector<uint32_t> dispatch_compile_args = {
         dispatch_buffer_base,
         DispatchSettings::DISPATCH_BUFFER_LOG_PAGE_SIZE,
         DispatchSettings::DISPATCH_BUFFER_SIZE_BLOCKS *
-            DispatchMemMap::get(dispatch_core_type, 1).dispatch_buffer_block_size_pages(),
+            DispatchMemMap::get(DISPATCH_CORE_TYPE, 1).dispatch_buffer_block_size_pages(),
         dispatch_cb_sem,  // overridden below for h
         split_prefetcher_g ? prefetch_d_downstream_cb_sem : prefetch_downstream_cb_sem,
         DispatchSettings::DISPATCH_BUFFER_SIZE_BLOCKS,
@@ -3321,7 +3321,7 @@ int main(int argc, char** argv) {
         CoreCoord phys_dispatch_relay_demux_core;
         uint32_t packetized_path_test_results_addr;
         uint32_t cq_start =
-            DispatchMemMap::get(CoreType::WORKER).get_host_command_queue_addr(CommandQueueHostAddrType::UNRESERVED);
+            DispatchMemMap::get(DISPATCH_CORE_TYPE).get_host_command_queue_addr(CommandQueueHostAddrType::UNRESERVED);
         uint32_t dev_hugepage_base_g = 2 * (cq_start * sizeof(uint32_t));  // HOST_CQ uses some at the start address
 
         if (test_device_id_g == 0) {
diff --git a/tt_metal/api/tt-metalium/command_queue_interface.hpp b/tt_metal/api/tt-metalium/command_queue_interface.hpp
index 4f7876aa9f24..8320c1cf40a4 100644
--- a/tt_metal/api/tt-metalium/command_queue_interface.hpp
+++ b/tt_metal/api/tt-metalium/command_queue_interface.hpp
@@ -7,6 +7,7 @@
 #include <magic_enum/magic_enum.hpp>
 #include <mutex>
 #include <tt-metalium/tt_align.hpp>
+#include <unordered_map>
 
 #include "cq_commands.hpp"
 #include "dispatch_core_manager.hpp"
@@ -16,6 +17,7 @@
 #include "dispatch_settings.hpp"
 #include "helpers.hpp"
 #include "buffer.hpp"
+#include "umd/device/tt_core_coordinates.h"
 
 // FIXME: Don't do this in header files
 using namespace tt::tt_metal;
@@ -58,51 +60,61 @@ class DispatchMemMap {
     DispatchMemMap(const DispatchMemMap&) = delete;
     DispatchMemMap(DispatchMemMap&& other) noexcept = delete;
 
-    // Returns an instance of the memory map for the provided core_type. If the number of HW CQs is not provided then
-    // the previous one will be used.
-    static const DispatchMemMap& get(const CoreType& core_type, const uint32_t num_hw_cqs = 0) {
-        static DispatchMemMap instance;
-
-        if (num_hw_cqs > 0 && (core_type != instance.last_core_type || num_hw_cqs != instance.hw_cqs)) {
+    //
+    // Returns an instance. The instance is reset if the core_type and/or num_hw_cqs changed from
+    // the last call. The memory region sizes can be configured using DispatchSettings.
+    //
+    // If the settings changed, then force_reinit_with_settings will recreate the instance with
+    // the settings for the given core_type / num_hw_cqs.
+    //
+    static const DispatchMemMap& get(
+        const CoreType& core_type, const uint32_t num_hw_cqs = 0, const bool force_reinit_with_settings = false) {
+        auto& instance = get_instance();
+
+        if (num_hw_cqs > 0 && (core_type != instance.last_core_type || num_hw_cqs != instance.hw_cqs) ||
+            force_reinit_with_settings) {
             instance.reset(core_type, num_hw_cqs);
         }
 
-        TT_FATAL(instance.hw_cqs > 0, "Command Queue is not initialized. Call DispatchMemMap::get with non zero num_hw_cqs.");
+        TT_FATAL(
+            instance.hw_cqs > 0,
+            "Command Queue is not initialized. Call DispatchMemMap::get with non zero num_hw_cqs.");
         return instance;
     }
 
-    uint32_t prefetch_q_entries() const { return prefetch_q_entries_; }
+    uint32_t prefetch_q_entries() const { return settings.prefetch_q_entries_; }
 
-    uint32_t prefetch_q_size() const { return prefetch_q_size_; }
+    uint32_t prefetch_q_size() const { return settings.prefetch_q_size_; }
 
-    uint32_t max_prefetch_command_size() const { return max_prefetch_command_size_; }
+    uint32_t max_prefetch_command_size() const { return settings.prefetch_max_cmd_size_; }
 
     uint32_t cmddat_q_base() const { return cmddat_q_base_; }
 
-    uint32_t cmddat_q_size() const { return cmddat_q_size_; }
+    uint32_t cmddat_q_size() const { return settings.prefetch_cmddat_q_size_; }
 
     uint32_t scratch_db_base() const { return scratch_db_base_; }
 
-    uint32_t scratch_db_size() const { return scratch_db_size_; }
+    uint32_t scratch_db_size() const { return settings.prefetch_scratch_db_size_; }
 
     uint32_t dispatch_buffer_block_size_pages() const { return dispatch_buffer_block_size_pages_; }
 
     uint32_t dispatch_buffer_base() const { return dispatch_buffer_base_; }
 
-    uint32_t dispatch_buffer_pages() const { return dispatch_buffer_pages_; }
+    uint32_t dispatch_buffer_pages() const { return settings.dispatch_pages_; }
 
-    uint32_t prefetch_d_buffer_size() const { return prefetch_d_buffer_size_; }
+    uint32_t prefetch_d_buffer_size() const { return settings.prefetch_d_buffer_size_; }
 
-    uint32_t prefetch_d_buffer_pages() const { return prefetch_d_buffer_pages_; }
+    uint32_t prefetch_d_buffer_pages() const { return settings.prefetch_d_pages_; }
 
-    uint32_t mux_buffer_size(uint8_t num_hw_cqs = 1) const { return prefetch_d_buffer_size_ / num_hw_cqs; }
+    uint32_t mux_buffer_size(uint8_t num_hw_cqs = 1) const { return settings.tunneling_buffer_size_ / num_hw_cqs; }
 
-    uint32_t mux_buffer_pages(uint8_t num_hw_cqs = 1) const { return prefetch_d_buffer_pages_ / num_hw_cqs; }
+    uint32_t mux_buffer_pages(uint8_t num_hw_cqs = 1) const { return settings.tunneling_buffer_pages_ / num_hw_cqs; }
 
-    uint32_t dispatch_s_buffer_size() const { return dispatch_s_buffer_size_; }
+    uint32_t dispatch_s_buffer_size() const { return settings.dispatch_s_buffer_size_; }
 
     uint32_t dispatch_s_buffer_pages() const {
-        return dispatch_s_buffer_size_ / (1 << tt::tt_metal::DispatchSettings::DISPATCH_S_BUFFER_LOG_PAGE_SIZE);
+        return settings.dispatch_s_buffer_size_ /
+               (1 << tt::tt_metal::DispatchSettings::DISPATCH_S_BUFFER_LOG_PAGE_SIZE);
     }
 
     uint32_t get_device_command_queue_addr(const CommandQueueDeviceAddrType& device_addr_type) const {
@@ -125,30 +137,25 @@ class DispatchMemMap {
 private:
     DispatchMemMap() = default;
 
-    // Reset the instance using the default settings for the core_type and num_hw_cqs.
-    void reset(const CoreType& core_type, const uint32_t num_hw_cqs) {
-        const auto settings = DispatchSettings::defaults(core_type, tt::Cluster::instance(), num_hw_cqs);
-        reset(settings);
+    static DispatchMemMap& get_instance() {
+        static DispatchMemMap instance;
+        return instance;
     }
 
-    // Reset the instance using the provided settings
-    void reset(const DispatchSettings& settings) {
+    // Reset the instance using the settings for the core_type and num_hw_cqs.
+    void reset(const CoreType& core_type, const uint32_t num_hw_cqs) {
+        const auto dispatch_settings = DispatchSettings::get(core_type, num_hw_cqs);
+        this->settings = dispatch_settings;
         last_core_type = settings.core_type_;
         hw_cqs = settings.num_hw_cqs_;
 
-        prefetch_q_entries_ = settings.prefetch_q_entries_;
-        max_prefetch_command_size_ = settings.prefetch_max_cmd_size_;
-        cmddat_q_size_ = settings.prefetch_cmddat_q_size_;
-        scratch_db_size_ = settings.prefetch_scratch_db_size_;
-        prefetch_d_buffer_size_ = settings.prefetch_d_buffer_size_;
-        dispatch_s_buffer_size_ = settings.dispatch_s_buffer_size_;
         const auto dispatch_buffer_block_size = settings.dispatch_size_;
         const auto [l1_base, l1_size] = get_device_l1_info(settings.core_type_);
         const auto pcie_alignment = tt::tt_metal::hal.get_alignment(tt::tt_metal::HalMemType::HOST);
         const auto l1_alignment = tt::tt_metal::hal.get_alignment(tt::tt_metal::HalMemType::L1);
 
-        TT_ASSERT(cmddat_q_size_ >= 2 * max_prefetch_command_size_);
-        TT_ASSERT(scratch_db_size_ % 2 == 0);
+        TT_ASSERT(settings.prefetch_cmddat_q_size_ >= 2 * settings.prefetch_max_cmd_size_);
+        TT_ASSERT(settings.prefetch_scratch_db_size_ % 2 == 0);
         TT_ASSERT((dispatch_buffer_block_size & (dispatch_buffer_block_size - 1)) == 0);
         TT_ASSERT(
             DispatchSettings::DISPATCH_MESSAGE_ENTRIES <= DispatchSettings::DISPATCH_MESSAGES_MAX_OFFSET / l1_alignment + 1,
@@ -183,20 +190,17 @@ class DispatchMemMap {
             }
         }
 
-        prefetch_q_size_ = prefetch_q_entries_ * sizeof(DispatchSettings::prefetch_q_entry_type);
         uint32_t prefetch_dispatch_unreserved_base =
             device_cq_addrs_[tt::utils::underlying_type<CommandQueueDeviceAddrType>(
                 CommandQueueDeviceAddrType::UNRESERVED)];
-        cmddat_q_base_ = prefetch_dispatch_unreserved_base + round_size(prefetch_q_size_, pcie_alignment);
-        scratch_db_base_ = cmddat_q_base_ + round_size(cmddat_q_size_, pcie_alignment);
-
-        TT_ASSERT(scratch_db_base_ + scratch_db_size_ < l1_size);
+        cmddat_q_base_ = prefetch_dispatch_unreserved_base + round_size(settings.prefetch_q_size_, pcie_alignment);
+        scratch_db_base_ = cmddat_q_base_ + round_size(settings.prefetch_cmddat_q_size_, pcie_alignment);
         dispatch_buffer_base_ = align(prefetch_dispatch_unreserved_base, 1 << DispatchSettings::DISPATCH_BUFFER_LOG_PAGE_SIZE);
-        dispatch_buffer_pages_ = dispatch_buffer_block_size / (1 << DispatchSettings::DISPATCH_BUFFER_LOG_PAGE_SIZE);
-        dispatch_buffer_block_size_pages_ = dispatch_buffer_pages_ / DispatchSettings::DISPATCH_BUFFER_SIZE_BLOCKS;
+        dispatch_buffer_block_size_pages_ = settings.dispatch_pages_ / DispatchSettings::DISPATCH_BUFFER_SIZE_BLOCKS;
         const uint32_t dispatch_cb_end = dispatch_buffer_base_ + settings.dispatch_size_;
+
+        TT_ASSERT(scratch_db_base_ + settings.prefetch_scratch_db_size_ < l1_size);
         TT_ASSERT(dispatch_cb_end < l1_size);
-        prefetch_d_buffer_pages_ = settings.prefetch_d_pages_;
     }
 
     std::pair<uint32_t, uint32_t> get_device_l1_info(const CoreType& core_type) const {
@@ -219,21 +223,15 @@ class DispatchMemMap {
         return {l1_base, l1_size};
     }
 
-    uint32_t prefetch_q_entries_;
-    uint32_t prefetch_q_size_;
-    uint32_t max_prefetch_command_size_;
     uint32_t cmddat_q_base_;
-    uint32_t cmddat_q_size_;
     uint32_t scratch_db_base_;
-    uint32_t scratch_db_size_;
     uint32_t dispatch_buffer_base_;
+
     uint32_t dispatch_buffer_block_size_pages_;
-    uint32_t dispatch_buffer_pages_;
-    uint32_t prefetch_d_buffer_size_;
-    uint32_t prefetch_d_buffer_pages_;
-    uint32_t dispatch_s_buffer_size_;
     std::vector<uint32_t> device_cq_addrs_;
 
+    DispatchSettings settings;
+
     uint32_t hw_cqs{0}; // 0 means uninitialized
     CoreType last_core_type{CoreType::WORKER};
 };
diff --git a/tt_metal/api/tt-metalium/dispatch_settings.hpp b/tt_metal/api/tt-metalium/dispatch_settings.hpp
index 65242a4f2592..357e5220d16a 100644
--- a/tt_metal/api/tt-metalium/dispatch_settings.hpp
+++ b/tt_metal/api/tt-metalium/dispatch_settings.hpp
@@ -5,10 +5,12 @@
 #pragma once
 
 #include <cstdint>
+#include <magic_enum/magic_enum.hpp>
 #include <unordered_map>
 #include "hal.hpp"
 #include "tt_cluster.hpp"
 #include <tt-metalium/cq_commands.hpp>
+#include <utility>
 #include "umd/device/tt_core_coordinates.h"
 
 namespace tt::tt_metal {
@@ -16,7 +18,76 @@ namespace tt::tt_metal {
 //
 // Dispatch Kernel Settings
 //
-struct DispatchSettings {
+class DispatchSettings {
+private:
+    struct DispatchSettingsContainerKey {
+        CoreType core_type;
+        uint32_t num_hw_cqs;
+
+        bool operator==(const DispatchSettingsContainerKey& other) const {
+            return core_type == other.core_type && num_hw_cqs == other.num_hw_cqs;
+        }
+    };
+
+    struct DispatchSettingsContainerKeyHasher {
+        size_t operator()(const DispatchSettingsContainerKey& k) const {
+            const auto h1 = std::hash<uint32_t>{}(static_cast<int>(k.core_type));
+            const auto h2 = std::hash<uint32_t>{}(k.num_hw_cqs);
+            return h1 ^ (h2 << 1);
+        }
+    };
+
+    using DispatchSettingsContainer =
+        std::unordered_map<DispatchSettingsContainerKey, DispatchSettings, DispatchSettingsContainerKeyHasher>;
+
+    static DispatchSettingsContainer& get_store() {
+        static DispatchSettingsContainer store;
+        return store;
+    }
+
+public:
+    // Returns the default settings for WORKER cores
+    static DispatchSettings worker_defaults(const tt::Cluster& cluster, const uint32_t num_hw_cqs);
+
+    // Returns the default settings for ETH cores
+    static DispatchSettings eth_defaults(const tt::Cluster& cluster, const uint32_t num_hw_cqs);
+
+    // Returns the default settings
+    static DispatchSettings defaults(const CoreType& core_type, const tt::Cluster& cluster, const uint32_t num_hw_cqs);
+
+    // Returns the settings for a core type and number hw cqs. The values can be modified, but customization must occur
+    // before command queue kernels are created.
+    static DispatchSettings& get(const CoreType& core_type, const uint32_t num_hw_cqs) {
+        DispatchSettingsContainerKey k{core_type, num_hw_cqs};
+        auto& store = get_store();
+        if (!store.contains(k)) {
+            TT_THROW(
+                "DispatchSettings is not initialized for CoreType {}, {} CQs",
+                magic_enum::enum_name(core_type),
+                num_hw_cqs);
+        }
+        return store[k];
+    }
+
+    // Reset the settings
+    static void initialize(const tt::Cluster& cluster) {
+        static constexpr std::array<CoreType, 2> k_SupportedCoreTypes{CoreType::ETH, CoreType::WORKER};
+        auto& store = get_store();
+        for (const auto& core_type : k_SupportedCoreTypes) {
+            for (int hw_cqs = 1; hw_cqs <= MAX_NUM_HW_CQS; ++hw_cqs) {
+                DispatchSettingsContainerKey k{core_type, hw_cqs};
+                store[k] = DispatchSettings::defaults(core_type, cluster, hw_cqs);
+            }
+        }
+    }
+
+    // Reset the settings for a core type and number hw cqs to the provided settings
+    static void initialize(const DispatchSettings& other) {
+        auto& store = get_store();
+        DispatchSettingsContainerKey k{other.core_type_, other.num_hw_cqs_};
+        store[k] = other;
+    }
+
     //
     // Non Configurable Settings
     //
@@ -128,15 +199,6 @@ struct DispatchSettings {
         return !(*this == other);
     }
 
-    // Returns the default settings for WORKER cores
-    static DispatchSettings worker_defaults(const tt::Cluster& cluster, const uint32_t num_hw_cqs);
-
-    // Returns the default settings for ETH cores
-    static DispatchSettings eth_defaults(const tt::Cluster& cluster, const uint32_t num_hw_cqs);
-
-    // Returns the default settings
-    static DispatchSettings defaults(const CoreType& core_type, const tt::Cluster& cluster, const uint32_t num_hw_cqs);
-
     DispatchSettings& core_type(const CoreType& val) {
         this->core_type_ = val;
         return *this;
@@ -223,26 +285,4 @@ struct DispatchSettings {
     DispatchSettings& build();
 };
 
-struct DispatchSettingsContainerKey {
-    CoreType core_type;
-    uint32_t num_hw_cqs;
-
-    bool operator==(const DispatchSettingsContainerKey& other) const {
-        return core_type == other.core_type && num_hw_cqs == other.num_hw_cqs;
-    }
-};
-
-using DispatchSettingsContainer = std::unordered_map<DispatchSettingsContainerKey, DispatchSettings>;
-
 }  // namespace tt::tt_metal
-
-namespace std {
-template <>
-struct hash<tt::tt_metal::DispatchSettingsContainerKey> {
-    size_t operator()(const tt::tt_metal::DispatchSettingsContainerKey& k) const {
-        const auto h1 = std::hash<uint32_t>{}(static_cast<int>(k.core_type));
-        const auto h2 = std::hash<uint32_t>{}(k.num_hw_cqs);
-        return h1 ^ (h2 << 1);
-    }
-};
-}  // namespace std
diff --git a/tt_metal/impl/device/device_pool.cpp b/tt_metal/impl/device/device_pool.cpp
index 391c8a26d437..c8a870c8e4cb 100644
--- a/tt_metal/impl/device/device_pool.cpp
+++ b/tt_metal/impl/device/device_pool.cpp
@@ -12,6 +12,8 @@
 #include <set>
 #include <utility>
 
+#include "dispatch_core_manager.hpp"
+#include "dispatch_settings.hpp"
 #include "dprint_server.hpp"
 #include "host_api.hpp"
 #include <tt_metal.hpp>
@@ -203,6 +205,8 @@ void DevicePool::initialize(
     tt::tt_metal::dispatch_core_manager::initialize(dispatch_core_config, num_hw_cqs);
     // Initialize the dispatch query layer, used by runtime command generation
     tt_metal::DispatchQueryManager::initialize(num_hw_cqs);
+    // Initialize DispatchSettings with defaults
+    tt_metal::DispatchSettings::initialize(tt::Cluster::instance());
 
     if (_inst == nullptr) {
         static DevicePool device_pool{};
diff --git a/tt_metal/tt_metal.cpp b/tt_metal/tt_metal.cpp
index cff7a0d113ac..2ccb761ed090 100644
--- a/tt_metal/tt_metal.cpp
+++ b/tt_metal/tt_metal.cpp
@@ -921,6 +921,7 @@ IDevice* CreateDeviceMinimal(
     ZoneScoped;
     tt::tt_metal::dispatch_core_manager::initialize(dispatch_core_config, num_hw_cqs);
     tt_metal::DispatchQueryManager::initialize(num_hw_cqs);
+    tt_metal::DispatchSettings::initialize(tt::Cluster::instance());
     auto dev = new Device(device_id, num_hw_cqs, DEFAULT_L1_SMALL_SIZE, DEFAULT_TRACE_REGION_SIZE, {}, true);
     tt::Cluster::instance().set_internal_routing_info_for_ethernet_cores(true);
     return dev;