diff --git a/rust/scx_utils/src/compat.rs b/rust/scx_utils/src/compat.rs
index edebae80a..44d64bbd6 100644
--- a/rust/scx_utils/src/compat.rs
+++ b/rust/scx_utils/src/compat.rs
@@ -165,7 +165,7 @@ pub fn check_min_requirements() -> Result<()> {
     // ec7e3b0463e1 ("implement-ops") in https://github.com/sched-ext/sched_ext
     // is the current minimum required kernel version.
     // if let Ok(false) | Err(_) = struct_has_field("sched_ext_ops", "dump") {
-    //     bail!("sched_ext_ops.dump() missing, kernel too old?");
+    //    bail!("sched_ext_ops.dump() missing, kernel too old?");
     // }
     Ok(())
 }
@@ -187,7 +187,6 @@ macro_rules! scx_ops_open {
             };
 
             let ops = skel.struct_ops.[<$ops _mut>]();
-            // ahh think this part matters a lot.
             //    let path = std::path::Path::new("/sys/kernel/sched_ext/hotplug_seq");
 
             //     let val = match std::fs::read_to_string(&path) {
@@ -219,7 +218,7 @@ macro_rules! scx_ops_open {
 macro_rules! scx_ops_load {
     ($skel: expr, $ops: ident, $uei: ident) => { 'block: {
         scx_utils::paste! {
-            // scx_utils::uei_set_size!($skel, $ops, $uei);
+            //scx_utils::uei_set_size!($skel, $ops, $uei);
             $skel.load().context("Failed to load BPF program")
         }
     }};
diff --git a/rust/scx_utils/src/user_exit_info.rs b/rust/scx_utils/src/user_exit_info.rs
index 888a87483..f31fe9220 100644
--- a/rust/scx_utils/src/user_exit_info.rs
+++ b/rust/scx_utils/src/user_exit_info.rs
@@ -2,7 +2,7 @@
 //
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2.
-use crate::bindings;
+//use crate::bindings;
 use crate::compat;
 use anyhow::bail;
 use anyhow::Result;
diff --git a/scheds/include/scx/user_exit_info.h b/scheds/include/scx/user_exit_info.h
index 0886cf097..8bdf3620f 100644
--- a/scheds/include/scx/user_exit_info.h
+++ b/scheds/include/scx/user_exit_info.h
@@ -17,7 +17,7 @@ enum uei_sizes {
 };
 
 struct user_exit_info {
-	int		type;
+	int		kind;
 	// s64		exit_code;
 	char		reason[UEI_REASON_LEN];
 	char		msg[UEI_MSG_LEN];
@@ -28,6 +28,15 @@ struct user_exit_info {
 #include "vmlinux.h"
 #include <bpf/bpf_core_read.h>
 
+static inline void uei_record(struct user_exit_info *uei,
+			      const struct scx_exit_info *ei)
+{
+	bpf_probe_read_kernel_str(uei->reason, sizeof(uei->reason), ei->reason);
+	bpf_probe_read_kernel_str(uei->msg, sizeof(uei->msg), ei->msg);
+	/* use __sync to force memory barrier */
+	__sync_val_compare_and_swap(&uei->kind, uei->kind, ei->type);
+}
+
 #define UEI_DEFINE(__name)							\
 	char RESIZABLE_ARRAY(data, __name##_dump);				\
 	const volatile u32 __name##_dump_len;					\
@@ -61,7 +70,7 @@ struct user_exit_info {
 
 #define UEI_EXITED(__skel, __uei_name) ({					\
 	/* use __sync to force memory barrier */				\
-	__sync_val_compare_and_swap(&(__skel)->data->__uei_name.type, -1, -1);	\
+	__sync_val_compare_and_swap(&(__skel)->data->__uei_name.kind, -1, -1);	\
 })
 
 #define UEI_REPORT(__skel, __uei_name) ({					\
diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c
index bd9f3a507..a5c456715 100644
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@@ -1,6 +1,5 @@
 /* Copyright (c) Meta Platforms, Inc. and affiliates. */
 #include <scx/common.bpf.h>
-#include <scx/compat.bpf.h>
 #include <scx/ravg_impl.bpf.h>
 #include "intf.h"
 
@@ -33,12 +32,20 @@ struct layer layers[MAX_LAYERS];
 u32 fallback_cpu;
 static u32 preempt_cursor;
 
+volatile u32 ___sched_error_line;
+
 #define dbg(fmt, args...)	do { if (debug) bpf_printk(fmt, ##args); } while (0)
 #define trace(fmt, args...)	do { if (debug > 1) bpf_printk(fmt, ##args); } while (0)
 
-#include "util.bpf.c"
+#include "util.bpf.h"
+
+#define workaround_err(fmt, args...)            \
+	do {                                    \
+		bpf_printk(fmt, ##args);        \
+		___sched_error_line = __LINE__; \
+	} while (0)
 
-UEI_DEFINE(uei);
+struct user_exit_info uei;
 
 static inline bool vtime_before(u64 a, u64 b)
 {
@@ -315,6 +322,7 @@ struct task_ctx {
 	struct bpf_cpumask __kptr *layered_cpumask;
 
 	bool			all_cpus_allowed;
+	bool			dispatch_local;
 	u64			runnable_at;
 	u64			running_at;
 };
@@ -377,7 +385,7 @@ int BPF_PROG(tp_cgroup_attach_task, struct cgroup *cgrp, const char *cgrp_path,
 	thread_head = &leader->signal->thread_head;
 
 	if (!(next = bpf_task_acquire(leader))) {
-		scx_bpf_error("failed to acquire leader");
+		workaround_err("failed to acquire leader");
 		return 0;
 	}
 
@@ -569,9 +577,8 @@ s32 BPF_STRUCT_OPS(layered_select_cpu, struct task_struct *p, s32 prev_cpu, u64
 	cpu = pick_idle_cpu(p, prev_cpu, cctx, tctx, layer, true);
 
 	if (cpu >= 0) {
-		lstat_inc(LSTAT_SEL_LOCAL, layer, cctx);
 		u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
-		scx_bpf_dispatch(p, SCX_DSQ_LOCAL, layer_slice_ns, 0);
+		tctx->dispatch_local = true;
 		return cpu;
 	} else {
 		return prev_cpu;
@@ -589,7 +596,7 @@ bool pick_idle_cpu_and_kick(struct task_struct *p, s32 task_cpu,
 
 	if (cpu >= 0) {
 		lstat_inc(LSTAT_KICK, layer, cctx);
-		scx_bpf_kick_cpu(cpu, 0 /* SCX_KICK_IDLE */);
+		scx_bpf_kick_cpu(cpu, 0 /*SCX_KICK_IDLE*/);
 		return true;
 	} else {
 		return false;
@@ -658,6 +665,13 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
 	    !(layer = lookup_layer(tctx->layer)))
 		return;
 
+	if (tctx->dispatch_local) {
+		tctx->dispatch_local = false;
+		lstat_inc(LSTAT_SEL_LOCAL, layer, cctx);
+		scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, enq_flags);
+		return;
+	}
+
 	try_preempt_first = cctx->try_preempt_first;
 	cctx->try_preempt_first = false;
 	u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
@@ -1263,8 +1277,8 @@ void BPF_STRUCT_OPS(layered_running, struct task_struct *p)
 		}
 	}
 
-	 if (layer->perf > 0)
-	 	__COMPAT_scx_bpf_cpuperf_set(task_cpu, layer->perf);
+	if (layer->perf > 0)
+		__COMPAT_scx_bpf_cpuperf_set(task_cpu, layer->perf);
 
 	cctx->maybe_idle = false;
 }
@@ -1377,6 +1391,7 @@ s32 BPF_STRUCT_OPS(layered_prep_enable, struct task_struct *p, struct scx_enable
 {
 	struct task_ctx *tctx;
 	struct bpf_cpumask *cpumask;
+
 	/*
 	 * XXX - We want BPF_NOEXIST but bpf_map_delete_elem() in .disable() may
 	 * fail spuriously due to BPF recursion protection triggering
@@ -1417,103 +1432,14 @@ s32 BPF_STRUCT_OPS(layered_prep_enable, struct task_struct *p, struct scx_enable
 	 * fork path, let's delay the layer selection until the first
 	 * runnable().
 	 */
-	return 0;
-}
-
-/*
-static u64 dsq_first_runnable_for_ms(u64 dsq_id, u64 now)
-{
-	struct task_struct *p;
-
-	if (dsq_id > LO_FALLBACK_DSQ)
-		return 0;
-
-	 bpf_for_each(scx_dsq, p, dsq_id, 0) {
-	 	struct task_ctx *tctx;
-
-	 	if ((tctx = lookup_task_ctx(p)))
-	 		return (now - tctx->runnable_at) / 1000000;
-	 }
 
 	return 0;
 }
-*/
-/* 
-static void dump_layer_cpumask(int idx)
-{
-	struct cpumask *layer_cpumask;
-	s32 cpu;
-	char buf[128] = "", *p;
-
-	if (!(layer_cpumask = lookup_layer_cpumask(idx)))
-		return;
-
-	bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) {
-		if (!(p = MEMBER_VPTR(buf, [idx++])))
-			break;
-		if (bpf_cpumask_test_cpu(cpu, layer_cpumask))
-			*p++ = '0' + cpu % 10;
-		else
-			*p++ = '.';
-
-		if ((cpu & 7) == 7) {
-			if (!(p = MEMBER_VPTR(buf, [idx++])))
-				break;
-			*p++ = '|';
-		}
-	}
-	buf[sizeof(buf) - 1] = '\0';
-
-	 scx_bpf_dump("%s", buf); 
-}
-*/
-/*
-void BPF_STRUCT_OPS(layered_dump struct scx_dump_ctx *dctx)
-{
-	u64 now = bpf_ktime_get_ns();
-	int i, j, idx;
-	struct layer *layer;
-
-	bpf_for(i, 0, nr_layers) {
-		layer = lookup_layer(i);
-		if (!layer) {
-			scx_bpf_error("unabled to lookup layer %d", i);
-			continue;
-		}
-
-		if (disable_topology) {
-			scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=",
-				     i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i),
-				     dsq_first_runnable_for_ms(i, now));
-		} else {
-			bpf_for(j, 0, nr_llcs) {
-				if (!(layer->cache_mask & (1 << j)))
-					continue;
-
-				idx = layer_dsq_id(layer->idx, j);
-				scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=",
-					     i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx),
-					     dsq_first_runnable_for_ms(idx, now));
-			}
-		}
-		dump_layer_cpumask(i); 
-		scx_bpf_dump("\n");
-	}
-
-	 scx_bpf_dump("HI_FALLBACK nr_queued=%d -%llums\n",
-		     scx_bpf_dsq_nr_queued(HI_FALLBACK_DSQ),
-		     dsq_first_runnable_for_ms(HI_FALLBACK_DSQ, now));
-	scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n",
-		     scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ),
-		     dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now));
-}*/
 
 void BPF_STRUCT_OPS(layered_disable, struct task_struct *p)
 {
 	struct cpu_ctx *cctx;
 	struct task_ctx *tctx;
-	s32 pid = p->pid;
-	int ret;
 
 	if (!(cctx = lookup_cpu_ctx(-1)) || !(tctx = lookup_task_ctx(p)))
 		return;
@@ -1522,6 +1448,91 @@ void BPF_STRUCT_OPS(layered_disable, struct task_struct *p)
 		__sync_fetch_and_add(&layers[tctx->layer].nr_tasks, -1);
 }
 
+// static u64 dsq_first_runnable_for_ms(u64 dsq_id, u64 now)
+// {
+// 	struct task_struct *p;
+// 
+// 	if (dsq_id > LO_FALLBACK_DSQ)
+// 		return 0;
+// 
+// 	bpf_for_each(scx_dsq, p, dsq_id, 0) {
+// 		struct task_ctx *tctx;
+// 
+// 		if ((tctx = lookup_task_ctx(p)))
+// 			return (now - tctx->runnable_at) / 1000000;
+// 	}
+// 
+// 	return 0;
+// }
+
+// static void dump_layer_cpumask(int idx)
+// {
+// 	struct cpumask *layer_cpumask;
+// 	s32 cpu;
+// 	char buf[128] = "", *p;
+// 
+// 	if (!(layer_cpumask = lookup_layer_cpumask(idx)))
+// 		return;
+// 
+// 	bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) {
+// 		if (!(p = MEMBER_VPTR(buf, [idx++])))
+// 			break;
+// 		if (bpf_cpumask_test_cpu(cpu, layer_cpumask))
+// 			*p++ = '0' + cpu % 10;
+// 		else
+// 			*p++ = '.';
+// 
+// 		if ((cpu & 7) == 7) {
+// 			if (!(p = MEMBER_VPTR(buf, [idx++])))
+// 				break;
+// 			*p++ = '|';
+// 		}
+// 	}
+// 	buf[sizeof(buf) - 1] = '\0';
+// 
+// 	scx_bpf_dump("%s", buf);
+// }
+// 
+// void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx)
+// {
+// 	u64 now = bpf_ktime_get_ns();
+// 	int i, j, idx;
+// 	struct layer *layer;
+// 
+// 	bpf_for(i, 0, nr_layers) {
+// 		layer = lookup_layer(i);
+// 		if (!layer) {
+// 			scx_bpf_error("unabled to lookup layer %d", i);
+// 			continue;
+// 		}
+// 
+// 		if (disable_topology) {
+// 			scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=",
+// 				     i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i),
+// 				     dsq_first_runnable_for_ms(i, now));
+// 		} else {
+// 			bpf_for(j, 0, nr_llcs) {
+// 				if (!(layer->cache_mask & (1 << j)))
+// 					continue;
+// 
+// 				idx = layer_dsq_id(layer->idx, j);
+// 				scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=",
+// 					     i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx),
+// 					     dsq_first_runnable_for_ms(idx, now));
+// 			}
+// 		}
+// 		dump_layer_cpumask(i);
+// 		scx_bpf_dump("\n");
+// 	}
+// 
+// 	scx_bpf_dump("HI_FALLBACK nr_queued=%d -%llums\n",
+// 		     scx_bpf_dsq_nr_queued(HI_FALLBACK_DSQ),
+// 		     dsq_first_runnable_for_ms(HI_FALLBACK_DSQ, now));
+// 	scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n",
+// 		     scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ),
+// 		     dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now));
+// }
+
 s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 {
 	struct bpf_cpumask *cpumask;
@@ -1529,6 +1540,14 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 
 	__COMPAT_scx_bpf_switch_all();
 
+	ret = scx_bpf_create_dsq(HI_FALLBACK_DSQ, -1);
+	if (ret < 0)
+		return ret;
+
+	ret = scx_bpf_create_dsq(LO_FALLBACK_DSQ, -1);
+	if (ret < 0)
+		return ret;
+
 	cpumask = bpf_cpumask_create();
 	if (!cpumask)
 		return -ENOMEM;
@@ -1551,11 +1570,11 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 	if (cpumask)
 		bpf_cpumask_release(cpumask);
 
-  bpf_for(i, 0, nr_nodes) {
-          ret = create_node(i);
-          if (ret)
-                  return ret;
-  }
+	bpf_for(i, 0, nr_nodes) {
+		ret = create_node(i);
+		if (ret)
+			return ret;
+	}
 
 	dbg("CFG: Dumping configuration, nr_online_cpus=%d smt_enabled=%d",
 	    nr_online_cpus, smt_enabled);
@@ -1619,6 +1638,21 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 				case MATCH_NICE_EQUALS:
 					dbg("%s NICE_EQUALS %d", header, match->nice);
 					break;
+				case MATCH_USER_ID_EQUALS:
+					dbg("%s USER_ID %u", header, match->user_id);
+					break;
+				case MATCH_GROUP_ID_EQUALS:
+					dbg("%s GROUP_ID %u", header, match->group_id);
+					break;
+				case MATCH_PID_EQUALS:
+					dbg("%s PID %u", header, match->pid);
+					break;
+				case MATCH_PPID_EQUALS:
+					dbg("%s PPID %u", header, match->ppid);
+					break;
+				case MATCH_TGID_EQUALS:
+					dbg("%s TGID %u", header, match->tgid);
+					break;
 				default:
 					scx_bpf_error("%s Invalid kind", header);
 					return -EINVAL;
@@ -1628,16 +1662,13 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 				dbg("CFG     DEFAULT");
 		}
 	}
-  
+
+	u64 llc_dsq_id = 0;
 	bpf_for(i, 0, nr_layers) {
 		struct layer_cpumask_wrapper *cpumaskw;
 
 		layers[i].idx = i;
 
-		ret = scx_bpf_create_dsq(i, -1);
-		if (ret < 0)
-			return ret;
-
 		if (!(cpumaskw = bpf_map_lookup_elem(&layer_cpumasks, &i)))
 			return -ENOENT;
 
@@ -1655,6 +1686,23 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 		cpumask = bpf_kptr_xchg(&cpumaskw->cpumask, cpumask);
 		if (cpumask)
 			bpf_cpumask_release(cpumask);
+
+		// create the dsqs for the layer
+		if (disable_topology) {
+			ret = scx_bpf_create_dsq(i, -1);
+			if (ret < 0)
+				return ret;
+		} else {
+			bpf_for(j, 0, nr_llcs) {
+				int node_id = llc_node_id(i);
+				dbg("creating dsq %llu for layer %d on node %d",
+				    llc_dsq_id, i, node_id);
+				ret = scx_bpf_create_dsq(llc_dsq_id, node_id);
+				if (ret < 0)
+					return ret;
+				llc_dsq_id++;
+			}
+		}
 	}
 
 	return 0;
@@ -1662,7 +1710,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 
 void BPF_STRUCT_OPS(layered_exit, struct scx_exit_info *ei)
 {
-	UEI_RECORD(uei, ei);
+	uei_record(&uei, ei);
 }
 
 SCX_OPS_DEFINE(layered,
@@ -1677,9 +1725,11 @@ SCX_OPS_DEFINE(layered,
 	       .set_weight		= (void *)layered_set_weight,
 	       .set_cpumask		= (void *)layered_set_cpumask,
 	       .cpu_release		= (void *)layered_cpu_release,
-         .prep_enable		= (void *)layered_prep_enable,
-         .disable		= (void *)layered_disable, 
-	      /* .dump			= (void *)layered_dump, */
+	       .prep_enable		= (void *)layered_prep_enable,
+	       .disable 		= (void *)layered_disable,
+	       // .dump			= (void *)layered_dump,
 	       .init			= (void *)layered_init,
 	       .exit			= (void *)layered_exit,
+	       .flags			= SCX_OPS_CGROUP_KNOB_WEIGHT | SCX_OPS_ENQ_LAST,
 	       .name			= "layered");
+
diff --git a/scheds/rust/scx_layered/src/bpf/util.bpf.h b/scheds/rust/scx_layered/src/bpf/util.bpf.h
new file mode 120000
index 000000000..ee7b16c86
--- /dev/null
+++ b/scheds/rust/scx_layered/src/bpf/util.bpf.h
@@ -0,0 +1 @@
+util.bpf.c
\ No newline at end of file
diff --git a/scheds/rust/scx_layered/src/main.rs b/scheds/rust/scx_layered/src/main.rs
index 38029a4ad..685688614 100644
--- a/scheds/rust/scx_layered/src/main.rs
+++ b/scheds/rust/scx_layered/src/main.rs
@@ -2,14 +2,20 @@
 
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2.
-mod bpf_skel;
+// mod bpf_skel;
 mod stats;
+pub mod bpf_skel;
 pub use bpf_skel::*;
 pub mod bpf_intf;
+use stats::LayerStats;
+use stats::StatsReq;
+use stats::StatsRes;
+use stats::SysStats;
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::collections::HashMap;
 use std::ffi::CString;
+use core::ffi::CStr;
 use std::fs;
 use std::io::Read;
 use std::io::Write;
@@ -43,20 +49,10 @@ use scx_stats::prelude::*;
 use scx_utils::compat;
 use scx_utils::init_libbpf_logging;
 use scx_utils::ravg::ravg_read;
-use scx_utils::scx_ops_attach;
-use scx_utils::scx_ops_load;
-use scx_utils::scx_ops_open;
-use scx_utils::uei_exited;
-use scx_utils::uei_report;
 use scx_utils::Cache;
 use scx_utils::Topology;
-use scx_utils::UserExitInfo;
 use serde::Deserialize;
 use serde::Serialize;
-use stats::LayerStats;
-use stats::StatsReq;
-use stats::StatsRes;
-use stats::SysStats;
 
 const RAVG_FRAC_BITS: u32 = bpf_intf::ravg_consts_RAVG_FRAC_BITS;
 const MAX_CPUS: usize = bpf_intf::consts_MAX_CPUS as usize;
@@ -420,6 +416,12 @@ struct Opts {
     #[clap(long)]
     monitor: Option<f64>,
 
+    /// DEPRECATED: Enable output of stats in OpenMetrics format instead of via
+    /// log macros.  This option is useful if you want to collect stats in some
+    /// monitoring database like prometheseus.
+    #[clap(short = 'o', long)]
+    open_metrics_format: bool,
+
     /// Run with example layer specifications (useful for e.g. CI pipelines)
     #[clap(long)]
     run_example: bool,
@@ -455,9 +457,7 @@ enum LayerGrowthAlgo {
 }
 
 impl Default for LayerGrowthAlgo {
-    fn default() -> Self {
-        LayerGrowthAlgo::Sticky
-    }
+    fn default() -> Self { LayerGrowthAlgo::Sticky }
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize)]
@@ -888,6 +888,67 @@ impl Stats {
     }
 }
 
+
+#[derive(Debug, Default)]
+struct UserExitInfo {
+    kind: i32,
+    reason: Option<String>,
+    msg: Option<String>,
+}
+
+impl UserExitInfo {
+    fn read(bpf_uei: &types::user_exit_info) -> Result<Self> {
+        let kind = unsafe { std::ptr::read_volatile(&bpf_uei.kind as *const _) };
+
+        let (reason, msg) = if kind != 0 {
+            (
+                Some(
+                    unsafe { CStr::from_ptr(bpf_uei.reason.as_ptr() as *const _) }
+                        .to_str()
+                        .context("Failed to convert reason to string")?
+                        .to_string(),
+                )
+                .filter(|s| !s.is_empty()),
+                Some(
+                    unsafe { CStr::from_ptr(bpf_uei.msg.as_ptr() as *const _) }
+                        .to_str()
+                        .context("Failed to convert msg to string")?
+                        .to_string(),
+                )
+                .filter(|s| !s.is_empty()),
+            )
+        } else {
+            (None, None)
+        };
+
+        Ok(Self { kind, reason, msg })
+    }
+
+    fn exited(bpf_uei: &types::user_exit_info) -> Result<bool> {
+        Ok(Self::read(bpf_uei)?.kind != 0)
+    }
+
+    fn report(&self) -> Result<()> {
+        let why = match (&self.reason, &self.msg) {
+            (Some(reason), None) => format!("{}", reason),
+            (Some(reason), Some(msg)) => format!("{} ({})", reason, msg),
+            _ => "".into(),
+        };
+
+        match self.kind {
+            0 => Ok(()),
+            etype => {
+                if etype != 64 {
+                    bail!("EXIT: kind={} {}", etype, why);
+                } else {
+                    info!("EXIT: {}", why);
+                    Ok(())
+                }
+            }
+        }
+    }
+}
+
 #[derive(Debug)]
 struct CpuPool {
     nr_cores: usize,
@@ -1092,11 +1153,10 @@ impl CpuPool {
 }
 
 fn layer_core_order(
-    spec: &LayerSpec,
     growth_algo: LayerGrowthAlgo,
     layer_idx: usize,
-    topo: &Topology,
-) -> Vec<usize> {
+    topo: &Topology
+    ) -> Vec<usize> {
     let mut core_order = vec![];
     match growth_algo {
         LayerGrowthAlgo::Sticky => {
@@ -1146,20 +1206,27 @@ struct Layer {
 }
 
 impl Layer {
-    fn new(spec: &LayerSpec, idx: usize, cpu_pool: &CpuPool, topo: &Topology) -> Result<Self> {
-        let name = &spec.name;
-        let kind = spec.kind.clone();
+    fn new(
+        idx: usize,
+        cpu_pool: &CpuPool,
+        name: &str,
+        kind: LayerKind,
+        topo: &Topology,
+    ) -> Result<Self> {
         let mut cpus = bitvec![0; cpu_pool.nr_cpus];
         cpus.fill(false);
         let mut allowed_cpus = bitvec![0; cpu_pool.nr_cpus];
+        let mut layer_growth_algo = LayerGrowthAlgo::Sticky;
         match &kind {
             LayerKind::Confined {
                 cpus_range,
                 util_range,
                 nodes,
                 llcs,
+                growth_algo,
                 ..
             } => {
+                layer_growth_algo = growth_algo.clone();
                 let cpus_range = cpus_range.unwrap_or((0, std::usize::MAX));
                 if cpus_range.0 > cpus_range.1 || cpus_range.1 == 0 {
                     bail!("invalid cpus_range {:?}", cpus_range);
@@ -1195,7 +1262,9 @@ impl Layer {
                     bail!("invalid util_range {:?}", util_range);
                 }
             }
-            LayerKind::Grouped { nodes, llcs, .. } | LayerKind::Open { nodes, llcs, .. } => {
+            LayerKind::Grouped { growth_algo, nodes, llcs, .. } |
+                LayerKind::Open { growth_algo, nodes, llcs, .. } => {
+                layer_growth_algo = growth_algo.clone();
                 if nodes.len() == 0 && llcs.len() == 0 {
                     allowed_cpus.fill(true);
                 } else {
@@ -1220,13 +1289,7 @@ impl Layer {
             }
         }
 
-        let layer_growth_algo = match &kind {
-            LayerKind::Confined { growth_algo, .. }
-            | LayerKind::Grouped { growth_algo, .. }
-            | LayerKind::Open { growth_algo, .. } => growth_algo.clone(),
-        };
-
-        let core_order = layer_core_order(spec, layer_growth_algo, idx, topo);
+        let core_order = layer_core_order(layer_growth_algo, idx, topo);
 
         Ok(Self {
             name: name.into(),
@@ -1273,7 +1336,8 @@ impl Layer {
         {
             trace!(
                 "layer-{} needs more CPUs (util={:.3}) but is over the load fraction",
-                &self.name, layer_util
+                &self.name,
+                layer_util
             );
             return Ok(false);
         }
@@ -1579,9 +1643,9 @@ impl<'a, 'b> Scheduler<'a, 'b> {
             perf_set |= layer.perf > 0;
         }
 
-         if perf_set && !compat::ksym_exists("scx_bpf_cpuperf_set")? {
-             warn!("cpufreq support not available, ignoring perf configurations");
-         }
+        if perf_set && !compat::ksym_exists("scx_bpf_cpuperf_set")? {
+            warn!("cpufreq support not available, ignoring perf configurations");
+        }
 
         Ok(())
     }
@@ -1591,7 +1655,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         skel.maps.rodata_data.nr_llcs = 0;
 
         for node in topo.nodes() {
-            debug!(
+            info!(
                 "configuring node {}, LLCs {:?}",
                 node.id(),
                 node.llcs().len()
@@ -1599,7 +1663,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
             skel.maps.rodata_data.nr_llcs += node.llcs().len() as u32;
 
             for (_, llc) in node.llcs() {
-                debug!("configuring llc {:?} for node {:?}", llc.id(), node.id());
+                info!("configuring llc {:?} for node {:?}", llc.id(), node.id());
                 skel.maps.rodata_data.llc_numa_id_map[llc.id()] = node.id() as u32;
             }
         }
@@ -1622,7 +1686,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         let mut skel_builder = BpfSkelBuilder::default();
         skel_builder.obj_builder.debug(opts.verbose > 1);
         init_libbpf_logging(None);
-        let mut skel = scx_ops_open!(skel_builder, open_object, layered)?;
+        let mut skel = skel_builder.open(open_object).context("failed to open BPF program")?;
 
         // scheduler_tick() got renamed to sched_tick() during v6.10-rc.
         let sched_tick_name = match compat::ksym_exists("sched_tick")? {
@@ -1657,11 +1721,17 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         Self::init_layers(&mut skel, opts, layer_specs, &topo)?;
         Self::init_nodes(&mut skel, opts, &topo);
 
-        let mut skel = scx_ops_load!(skel, layered, uei)?;
+        let mut skel = skel.load().context("Failed to load BPF program")?;
 
         let mut layers = vec![];
         for (idx, spec) in layer_specs.iter().enumerate() {
-            layers.push(Layer::new(&spec, idx, &cpu_pool, &topo)?);
+            layers.push(Layer::new(
+                idx,
+                &cpu_pool,
+                &spec.name,
+                spec.kind.clone(),
+                &topo,
+            )?);
         }
 
         // Other stuff.
@@ -1674,11 +1744,10 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         // huge problem in the interim until we figure it out.
 
         // Attach.
-        let struct_ops = scx_ops_attach!(skel, layered)?;
         let stats_server = StatsServer::new(stats::server_data()).launch()?;
 
-        let sched = Self {
-            struct_ops: Some(struct_ops),
+        let mut sched = Self {
+            struct_ops: None,
             layer_specs,
 
             sched_intv: Duration::from_secs_f64(opts.interval),
@@ -1698,6 +1767,20 @@ impl<'a, 'b> Scheduler<'a, 'b> {
             stats_server,
         };
 
+        sched
+        .skel
+        .attach()
+        .context("Failed to attach BPF program")?;
+
+        sched.struct_ops = Some(
+            sched
+                .skel
+                .maps
+                .layered
+                .attach_struct_ops()
+                .context("Failed to attach layered struct ops")?,
+        );
+
         info!("Layered Scheduler Attached. Run `scx_layered --monitor` for metrics.");
 
         Ok(sched)
@@ -1821,12 +1904,12 @@ impl<'a, 'b> Scheduler<'a, 'b> {
         Ok(sys_stats)
     }
 
-    fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<(), anyhow::Error> {
-let (res_ch, req_ch) = self.stats_server.channels();
+    fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<()> {
+        let (res_ch, req_ch) = self.stats_server.channels();
         let mut next_sched_at = Instant::now() + self.sched_intv;
         let mut cpus_ranges = HashMap::<ThreadId, Vec<(usize, usize)>>::new();
 
-        while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel, uei) {
+        while !shutdown.load(Ordering::Relaxed) && !UserExitInfo::exited(&self.skel.maps.bss_data.uei)? {
             let now = Instant::now();
 
             if now >= next_sched_at {
@@ -1873,9 +1956,7 @@ let (res_ch, req_ch) = self.stats_server.channels();
         }
 
         self.struct_ops.take();
-        let uei: UserExitInfo = uei_report!(&self.skel, uei)?;
-        uei.report()?;
-        Ok(())
+        UserExitInfo::read(&self.skel.maps.bss_data.uei)?.report()
     }
 }
 
@@ -2053,6 +2134,10 @@ fn main() -> Result<()> {
         );
     }
 
+    if opts.open_metrics_format {
+        warn!("open_metrics_format is deprecated");
+    }
+
     debug!("specs={}", serde_json::to_string_pretty(&layer_config)?);
     verify_layer_specs(&layer_config.specs)?;
 
@@ -2075,5 +2160,6 @@ fn main() -> Result<()> {
 
     let mut open_object = MaybeUninit::uninit();
     let mut sched = Scheduler::init(&opts, &layer_config.specs, &mut open_object)?;
-    sched.run(shutdown.clone())    
+    sched.run(shutdown.clone())
 }
+