diff --git a/rust/scx_utils/src/compat.rs b/rust/scx_utils/src/compat.rs index edebae80a..44d64bbd6 100644 --- a/rust/scx_utils/src/compat.rs +++ b/rust/scx_utils/src/compat.rs @@ -165,7 +165,7 @@ pub fn check_min_requirements() -> Result<()> { // ec7e3b0463e1 ("implement-ops") in https://github.com/sched-ext/sched_ext // is the current minimum required kernel version. // if let Ok(false) | Err(_) = struct_has_field("sched_ext_ops", "dump") { - // bail!("sched_ext_ops.dump() missing, kernel too old?"); + // bail!("sched_ext_ops.dump() missing, kernel too old?"); // } Ok(()) } @@ -187,7 +187,6 @@ macro_rules! scx_ops_open { }; let ops = skel.struct_ops.[<$ops _mut>](); - // ahh think this part matters a lot. // let path = std::path::Path::new("/sys/kernel/sched_ext/hotplug_seq"); // let val = match std::fs::read_to_string(&path) { @@ -219,7 +218,7 @@ macro_rules! scx_ops_open { macro_rules! scx_ops_load { ($skel: expr, $ops: ident, $uei: ident) => { 'block: { scx_utils::paste! { - // scx_utils::uei_set_size!($skel, $ops, $uei); + //scx_utils::uei_set_size!($skel, $ops, $uei); $skel.load().context("Failed to load BPF program") } }}; diff --git a/rust/scx_utils/src/user_exit_info.rs b/rust/scx_utils/src/user_exit_info.rs index 888a87483..f31fe9220 100644 --- a/rust/scx_utils/src/user_exit_info.rs +++ b/rust/scx_utils/src/user_exit_info.rs @@ -2,7 +2,7 @@ // // This software may be used and distributed according to the terms of the // GNU General Public License version 2. -use crate::bindings; +//use crate::bindings; use crate::compat; use anyhow::bail; use anyhow::Result; diff --git a/scheds/include/scx/user_exit_info.h b/scheds/include/scx/user_exit_info.h index 0886cf097..8bdf3620f 100644 --- a/scheds/include/scx/user_exit_info.h +++ b/scheds/include/scx/user_exit_info.h @@ -17,7 +17,7 @@ enum uei_sizes { }; struct user_exit_info { - int type; + int kind; // s64 exit_code; char reason[UEI_REASON_LEN]; char msg[UEI_MSG_LEN]; @@ -28,6 +28,15 @@ struct user_exit_info { #include "vmlinux.h" #include +static inline void uei_record(struct user_exit_info *uei, + const struct scx_exit_info *ei) +{ + bpf_probe_read_kernel_str(uei->reason, sizeof(uei->reason), ei->reason); + bpf_probe_read_kernel_str(uei->msg, sizeof(uei->msg), ei->msg); + /* use __sync to force memory barrier */ + __sync_val_compare_and_swap(&uei->kind, uei->kind, ei->type); +} + #define UEI_DEFINE(__name) \ char RESIZABLE_ARRAY(data, __name##_dump); \ const volatile u32 __name##_dump_len; \ @@ -61,7 +70,7 @@ struct user_exit_info { #define UEI_EXITED(__skel, __uei_name) ({ \ /* use __sync to force memory barrier */ \ - __sync_val_compare_and_swap(&(__skel)->data->__uei_name.type, -1, -1); \ + __sync_val_compare_and_swap(&(__skel)->data->__uei_name.kind, -1, -1); \ }) #define UEI_REPORT(__skel, __uei_name) ({ \ diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c index bd9f3a507..a5c456715 100644 --- a/scheds/rust/scx_layered/src/bpf/main.bpf.c +++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c @@ -1,6 +1,5 @@ /* Copyright (c) Meta Platforms, Inc. and affiliates. */ #include -#include #include #include "intf.h" @@ -33,12 +32,20 @@ struct layer layers[MAX_LAYERS]; u32 fallback_cpu; static u32 preempt_cursor; +volatile u32 ___sched_error_line; + #define dbg(fmt, args...) do { if (debug) bpf_printk(fmt, ##args); } while (0) #define trace(fmt, args...) do { if (debug > 1) bpf_printk(fmt, ##args); } while (0) -#include "util.bpf.c" +#include "util.bpf.h" + +#define workaround_err(fmt, args...) \ + do { \ + bpf_printk(fmt, ##args); \ + ___sched_error_line = __LINE__; \ + } while (0) -UEI_DEFINE(uei); +struct user_exit_info uei; static inline bool vtime_before(u64 a, u64 b) { @@ -315,6 +322,7 @@ struct task_ctx { struct bpf_cpumask __kptr *layered_cpumask; bool all_cpus_allowed; + bool dispatch_local; u64 runnable_at; u64 running_at; }; @@ -377,7 +385,7 @@ int BPF_PROG(tp_cgroup_attach_task, struct cgroup *cgrp, const char *cgrp_path, thread_head = &leader->signal->thread_head; if (!(next = bpf_task_acquire(leader))) { - scx_bpf_error("failed to acquire leader"); + workaround_err("failed to acquire leader"); return 0; } @@ -569,9 +577,8 @@ s32 BPF_STRUCT_OPS(layered_select_cpu, struct task_struct *p, s32 prev_cpu, u64 cpu = pick_idle_cpu(p, prev_cpu, cctx, tctx, layer, true); if (cpu >= 0) { - lstat_inc(LSTAT_SEL_LOCAL, layer, cctx); u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns; - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, layer_slice_ns, 0); + tctx->dispatch_local = true; return cpu; } else { return prev_cpu; @@ -589,7 +596,7 @@ bool pick_idle_cpu_and_kick(struct task_struct *p, s32 task_cpu, if (cpu >= 0) { lstat_inc(LSTAT_KICK, layer, cctx); - scx_bpf_kick_cpu(cpu, 0 /* SCX_KICK_IDLE */); + scx_bpf_kick_cpu(cpu, 0 /*SCX_KICK_IDLE*/); return true; } else { return false; @@ -658,6 +665,13 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags) !(layer = lookup_layer(tctx->layer))) return; + if (tctx->dispatch_local) { + tctx->dispatch_local = false; + lstat_inc(LSTAT_SEL_LOCAL, layer, cctx); + scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, enq_flags); + return; + } + try_preempt_first = cctx->try_preempt_first; cctx->try_preempt_first = false; u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns; @@ -1263,8 +1277,8 @@ void BPF_STRUCT_OPS(layered_running, struct task_struct *p) } } - if (layer->perf > 0) - __COMPAT_scx_bpf_cpuperf_set(task_cpu, layer->perf); + if (layer->perf > 0) + __COMPAT_scx_bpf_cpuperf_set(task_cpu, layer->perf); cctx->maybe_idle = false; } @@ -1377,6 +1391,7 @@ s32 BPF_STRUCT_OPS(layered_prep_enable, struct task_struct *p, struct scx_enable { struct task_ctx *tctx; struct bpf_cpumask *cpumask; + /* * XXX - We want BPF_NOEXIST but bpf_map_delete_elem() in .disable() may * fail spuriously due to BPF recursion protection triggering @@ -1417,103 +1432,14 @@ s32 BPF_STRUCT_OPS(layered_prep_enable, struct task_struct *p, struct scx_enable * fork path, let's delay the layer selection until the first * runnable(). */ - return 0; -} - -/* -static u64 dsq_first_runnable_for_ms(u64 dsq_id, u64 now) -{ - struct task_struct *p; - - if (dsq_id > LO_FALLBACK_DSQ) - return 0; - - bpf_for_each(scx_dsq, p, dsq_id, 0) { - struct task_ctx *tctx; - - if ((tctx = lookup_task_ctx(p))) - return (now - tctx->runnable_at) / 1000000; - } return 0; } -*/ -/* -static void dump_layer_cpumask(int idx) -{ - struct cpumask *layer_cpumask; - s32 cpu; - char buf[128] = "", *p; - - if (!(layer_cpumask = lookup_layer_cpumask(idx))) - return; - - bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) { - if (!(p = MEMBER_VPTR(buf, [idx++]))) - break; - if (bpf_cpumask_test_cpu(cpu, layer_cpumask)) - *p++ = '0' + cpu % 10; - else - *p++ = '.'; - - if ((cpu & 7) == 7) { - if (!(p = MEMBER_VPTR(buf, [idx++]))) - break; - *p++ = '|'; - } - } - buf[sizeof(buf) - 1] = '\0'; - - scx_bpf_dump("%s", buf); -} -*/ -/* -void BPF_STRUCT_OPS(layered_dump struct scx_dump_ctx *dctx) -{ - u64 now = bpf_ktime_get_ns(); - int i, j, idx; - struct layer *layer; - - bpf_for(i, 0, nr_layers) { - layer = lookup_layer(i); - if (!layer) { - scx_bpf_error("unabled to lookup layer %d", i); - continue; - } - - if (disable_topology) { - scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", - i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i), - dsq_first_runnable_for_ms(i, now)); - } else { - bpf_for(j, 0, nr_llcs) { - if (!(layer->cache_mask & (1 << j))) - continue; - - idx = layer_dsq_id(layer->idx, j); - scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", - i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx), - dsq_first_runnable_for_ms(idx, now)); - } - } - dump_layer_cpumask(i); - scx_bpf_dump("\n"); - } - - scx_bpf_dump("HI_FALLBACK nr_queued=%d -%llums\n", - scx_bpf_dsq_nr_queued(HI_FALLBACK_DSQ), - dsq_first_runnable_for_ms(HI_FALLBACK_DSQ, now)); - scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n", - scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ), - dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now)); -}*/ void BPF_STRUCT_OPS(layered_disable, struct task_struct *p) { struct cpu_ctx *cctx; struct task_ctx *tctx; - s32 pid = p->pid; - int ret; if (!(cctx = lookup_cpu_ctx(-1)) || !(tctx = lookup_task_ctx(p))) return; @@ -1522,6 +1448,91 @@ void BPF_STRUCT_OPS(layered_disable, struct task_struct *p) __sync_fetch_and_add(&layers[tctx->layer].nr_tasks, -1); } +// static u64 dsq_first_runnable_for_ms(u64 dsq_id, u64 now) +// { +// struct task_struct *p; +// +// if (dsq_id > LO_FALLBACK_DSQ) +// return 0; +// +// bpf_for_each(scx_dsq, p, dsq_id, 0) { +// struct task_ctx *tctx; +// +// if ((tctx = lookup_task_ctx(p))) +// return (now - tctx->runnable_at) / 1000000; +// } +// +// return 0; +// } + +// static void dump_layer_cpumask(int idx) +// { +// struct cpumask *layer_cpumask; +// s32 cpu; +// char buf[128] = "", *p; +// +// if (!(layer_cpumask = lookup_layer_cpumask(idx))) +// return; +// +// bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) { +// if (!(p = MEMBER_VPTR(buf, [idx++]))) +// break; +// if (bpf_cpumask_test_cpu(cpu, layer_cpumask)) +// *p++ = '0' + cpu % 10; +// else +// *p++ = '.'; +// +// if ((cpu & 7) == 7) { +// if (!(p = MEMBER_VPTR(buf, [idx++]))) +// break; +// *p++ = '|'; +// } +// } +// buf[sizeof(buf) - 1] = '\0'; +// +// scx_bpf_dump("%s", buf); +// } +// +// void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx) +// { +// u64 now = bpf_ktime_get_ns(); +// int i, j, idx; +// struct layer *layer; +// +// bpf_for(i, 0, nr_layers) { +// layer = lookup_layer(i); +// if (!layer) { +// scx_bpf_error("unabled to lookup layer %d", i); +// continue; +// } +// +// if (disable_topology) { +// scx_bpf_dump("LAYER[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", +// i, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(i), +// dsq_first_runnable_for_ms(i, now)); +// } else { +// bpf_for(j, 0, nr_llcs) { +// if (!(layer->cache_mask & (1 << j))) +// continue; +// +// idx = layer_dsq_id(layer->idx, j); +// scx_bpf_dump("LAYER[%d]DSQ[%d] nr_cpus=%u nr_queued=%d -%llums cpus=", +// i, idx, layers[i].nr_cpus, scx_bpf_dsq_nr_queued(idx), +// dsq_first_runnable_for_ms(idx, now)); +// } +// } +// dump_layer_cpumask(i); +// scx_bpf_dump("\n"); +// } +// +// scx_bpf_dump("HI_FALLBACK nr_queued=%d -%llums\n", +// scx_bpf_dsq_nr_queued(HI_FALLBACK_DSQ), +// dsq_first_runnable_for_ms(HI_FALLBACK_DSQ, now)); +// scx_bpf_dump("LO_FALLBACK nr_queued=%d -%llums\n", +// scx_bpf_dsq_nr_queued(LO_FALLBACK_DSQ), +// dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now)); +// } + s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) { struct bpf_cpumask *cpumask; @@ -1529,6 +1540,14 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) __COMPAT_scx_bpf_switch_all(); + ret = scx_bpf_create_dsq(HI_FALLBACK_DSQ, -1); + if (ret < 0) + return ret; + + ret = scx_bpf_create_dsq(LO_FALLBACK_DSQ, -1); + if (ret < 0) + return ret; + cpumask = bpf_cpumask_create(); if (!cpumask) return -ENOMEM; @@ -1551,11 +1570,11 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) if (cpumask) bpf_cpumask_release(cpumask); - bpf_for(i, 0, nr_nodes) { - ret = create_node(i); - if (ret) - return ret; - } + bpf_for(i, 0, nr_nodes) { + ret = create_node(i); + if (ret) + return ret; + } dbg("CFG: Dumping configuration, nr_online_cpus=%d smt_enabled=%d", nr_online_cpus, smt_enabled); @@ -1619,6 +1638,21 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) case MATCH_NICE_EQUALS: dbg("%s NICE_EQUALS %d", header, match->nice); break; + case MATCH_USER_ID_EQUALS: + dbg("%s USER_ID %u", header, match->user_id); + break; + case MATCH_GROUP_ID_EQUALS: + dbg("%s GROUP_ID %u", header, match->group_id); + break; + case MATCH_PID_EQUALS: + dbg("%s PID %u", header, match->pid); + break; + case MATCH_PPID_EQUALS: + dbg("%s PPID %u", header, match->ppid); + break; + case MATCH_TGID_EQUALS: + dbg("%s TGID %u", header, match->tgid); + break; default: scx_bpf_error("%s Invalid kind", header); return -EINVAL; @@ -1628,16 +1662,13 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) dbg("CFG DEFAULT"); } } - + + u64 llc_dsq_id = 0; bpf_for(i, 0, nr_layers) { struct layer_cpumask_wrapper *cpumaskw; layers[i].idx = i; - ret = scx_bpf_create_dsq(i, -1); - if (ret < 0) - return ret; - if (!(cpumaskw = bpf_map_lookup_elem(&layer_cpumasks, &i))) return -ENOENT; @@ -1655,6 +1686,23 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) cpumask = bpf_kptr_xchg(&cpumaskw->cpumask, cpumask); if (cpumask) bpf_cpumask_release(cpumask); + + // create the dsqs for the layer + if (disable_topology) { + ret = scx_bpf_create_dsq(i, -1); + if (ret < 0) + return ret; + } else { + bpf_for(j, 0, nr_llcs) { + int node_id = llc_node_id(i); + dbg("creating dsq %llu for layer %d on node %d", + llc_dsq_id, i, node_id); + ret = scx_bpf_create_dsq(llc_dsq_id, node_id); + if (ret < 0) + return ret; + llc_dsq_id++; + } + } } return 0; @@ -1662,7 +1710,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) void BPF_STRUCT_OPS(layered_exit, struct scx_exit_info *ei) { - UEI_RECORD(uei, ei); + uei_record(&uei, ei); } SCX_OPS_DEFINE(layered, @@ -1677,9 +1725,11 @@ SCX_OPS_DEFINE(layered, .set_weight = (void *)layered_set_weight, .set_cpumask = (void *)layered_set_cpumask, .cpu_release = (void *)layered_cpu_release, - .prep_enable = (void *)layered_prep_enable, - .disable = (void *)layered_disable, - /* .dump = (void *)layered_dump, */ + .prep_enable = (void *)layered_prep_enable, + .disable = (void *)layered_disable, + // .dump = (void *)layered_dump, .init = (void *)layered_init, .exit = (void *)layered_exit, + .flags = SCX_OPS_CGROUP_KNOB_WEIGHT | SCX_OPS_ENQ_LAST, .name = "layered"); + diff --git a/scheds/rust/scx_layered/src/bpf/util.bpf.h b/scheds/rust/scx_layered/src/bpf/util.bpf.h new file mode 120000 index 000000000..ee7b16c86 --- /dev/null +++ b/scheds/rust/scx_layered/src/bpf/util.bpf.h @@ -0,0 +1 @@ +util.bpf.c \ No newline at end of file diff --git a/scheds/rust/scx_layered/src/main.rs b/scheds/rust/scx_layered/src/main.rs index 38029a4ad..685688614 100644 --- a/scheds/rust/scx_layered/src/main.rs +++ b/scheds/rust/scx_layered/src/main.rs @@ -2,14 +2,20 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2. -mod bpf_skel; +// mod bpf_skel; mod stats; +pub mod bpf_skel; pub use bpf_skel::*; pub mod bpf_intf; +use stats::LayerStats; +use stats::StatsReq; +use stats::StatsRes; +use stats::SysStats; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; use std::ffi::CString; +use core::ffi::CStr; use std::fs; use std::io::Read; use std::io::Write; @@ -43,20 +49,10 @@ use scx_stats::prelude::*; use scx_utils::compat; use scx_utils::init_libbpf_logging; use scx_utils::ravg::ravg_read; -use scx_utils::scx_ops_attach; -use scx_utils::scx_ops_load; -use scx_utils::scx_ops_open; -use scx_utils::uei_exited; -use scx_utils::uei_report; use scx_utils::Cache; use scx_utils::Topology; -use scx_utils::UserExitInfo; use serde::Deserialize; use serde::Serialize; -use stats::LayerStats; -use stats::StatsReq; -use stats::StatsRes; -use stats::SysStats; const RAVG_FRAC_BITS: u32 = bpf_intf::ravg_consts_RAVG_FRAC_BITS; const MAX_CPUS: usize = bpf_intf::consts_MAX_CPUS as usize; @@ -420,6 +416,12 @@ struct Opts { #[clap(long)] monitor: Option, + /// DEPRECATED: Enable output of stats in OpenMetrics format instead of via + /// log macros. This option is useful if you want to collect stats in some + /// monitoring database like prometheseus. + #[clap(short = 'o', long)] + open_metrics_format: bool, + /// Run with example layer specifications (useful for e.g. CI pipelines) #[clap(long)] run_example: bool, @@ -455,9 +457,7 @@ enum LayerGrowthAlgo { } impl Default for LayerGrowthAlgo { - fn default() -> Self { - LayerGrowthAlgo::Sticky - } + fn default() -> Self { LayerGrowthAlgo::Sticky } } #[derive(Clone, Debug, Serialize, Deserialize)] @@ -888,6 +888,67 @@ impl Stats { } } + +#[derive(Debug, Default)] +struct UserExitInfo { + kind: i32, + reason: Option, + msg: Option, +} + +impl UserExitInfo { + fn read(bpf_uei: &types::user_exit_info) -> Result { + let kind = unsafe { std::ptr::read_volatile(&bpf_uei.kind as *const _) }; + + let (reason, msg) = if kind != 0 { + ( + Some( + unsafe { CStr::from_ptr(bpf_uei.reason.as_ptr() as *const _) } + .to_str() + .context("Failed to convert reason to string")? + .to_string(), + ) + .filter(|s| !s.is_empty()), + Some( + unsafe { CStr::from_ptr(bpf_uei.msg.as_ptr() as *const _) } + .to_str() + .context("Failed to convert msg to string")? + .to_string(), + ) + .filter(|s| !s.is_empty()), + ) + } else { + (None, None) + }; + + Ok(Self { kind, reason, msg }) + } + + fn exited(bpf_uei: &types::user_exit_info) -> Result { + Ok(Self::read(bpf_uei)?.kind != 0) + } + + fn report(&self) -> Result<()> { + let why = match (&self.reason, &self.msg) { + (Some(reason), None) => format!("{}", reason), + (Some(reason), Some(msg)) => format!("{} ({})", reason, msg), + _ => "".into(), + }; + + match self.kind { + 0 => Ok(()), + etype => { + if etype != 64 { + bail!("EXIT: kind={} {}", etype, why); + } else { + info!("EXIT: {}", why); + Ok(()) + } + } + } + } +} + #[derive(Debug)] struct CpuPool { nr_cores: usize, @@ -1092,11 +1153,10 @@ impl CpuPool { } fn layer_core_order( - spec: &LayerSpec, growth_algo: LayerGrowthAlgo, layer_idx: usize, - topo: &Topology, -) -> Vec { + topo: &Topology + ) -> Vec { let mut core_order = vec![]; match growth_algo { LayerGrowthAlgo::Sticky => { @@ -1146,20 +1206,27 @@ struct Layer { } impl Layer { - fn new(spec: &LayerSpec, idx: usize, cpu_pool: &CpuPool, topo: &Topology) -> Result { - let name = &spec.name; - let kind = spec.kind.clone(); + fn new( + idx: usize, + cpu_pool: &CpuPool, + name: &str, + kind: LayerKind, + topo: &Topology, + ) -> Result { let mut cpus = bitvec![0; cpu_pool.nr_cpus]; cpus.fill(false); let mut allowed_cpus = bitvec![0; cpu_pool.nr_cpus]; + let mut layer_growth_algo = LayerGrowthAlgo::Sticky; match &kind { LayerKind::Confined { cpus_range, util_range, nodes, llcs, + growth_algo, .. } => { + layer_growth_algo = growth_algo.clone(); let cpus_range = cpus_range.unwrap_or((0, std::usize::MAX)); if cpus_range.0 > cpus_range.1 || cpus_range.1 == 0 { bail!("invalid cpus_range {:?}", cpus_range); @@ -1195,7 +1262,9 @@ impl Layer { bail!("invalid util_range {:?}", util_range); } } - LayerKind::Grouped { nodes, llcs, .. } | LayerKind::Open { nodes, llcs, .. } => { + LayerKind::Grouped { growth_algo, nodes, llcs, .. } | + LayerKind::Open { growth_algo, nodes, llcs, .. } => { + layer_growth_algo = growth_algo.clone(); if nodes.len() == 0 && llcs.len() == 0 { allowed_cpus.fill(true); } else { @@ -1220,13 +1289,7 @@ impl Layer { } } - let layer_growth_algo = match &kind { - LayerKind::Confined { growth_algo, .. } - | LayerKind::Grouped { growth_algo, .. } - | LayerKind::Open { growth_algo, .. } => growth_algo.clone(), - }; - - let core_order = layer_core_order(spec, layer_growth_algo, idx, topo); + let core_order = layer_core_order(layer_growth_algo, idx, topo); Ok(Self { name: name.into(), @@ -1273,7 +1336,8 @@ impl Layer { { trace!( "layer-{} needs more CPUs (util={:.3}) but is over the load fraction", - &self.name, layer_util + &self.name, + layer_util ); return Ok(false); } @@ -1579,9 +1643,9 @@ impl<'a, 'b> Scheduler<'a, 'b> { perf_set |= layer.perf > 0; } - if perf_set && !compat::ksym_exists("scx_bpf_cpuperf_set")? { - warn!("cpufreq support not available, ignoring perf configurations"); - } + if perf_set && !compat::ksym_exists("scx_bpf_cpuperf_set")? { + warn!("cpufreq support not available, ignoring perf configurations"); + } Ok(()) } @@ -1591,7 +1655,7 @@ impl<'a, 'b> Scheduler<'a, 'b> { skel.maps.rodata_data.nr_llcs = 0; for node in topo.nodes() { - debug!( + info!( "configuring node {}, LLCs {:?}", node.id(), node.llcs().len() @@ -1599,7 +1663,7 @@ impl<'a, 'b> Scheduler<'a, 'b> { skel.maps.rodata_data.nr_llcs += node.llcs().len() as u32; for (_, llc) in node.llcs() { - debug!("configuring llc {:?} for node {:?}", llc.id(), node.id()); + info!("configuring llc {:?} for node {:?}", llc.id(), node.id()); skel.maps.rodata_data.llc_numa_id_map[llc.id()] = node.id() as u32; } } @@ -1622,7 +1686,7 @@ impl<'a, 'b> Scheduler<'a, 'b> { let mut skel_builder = BpfSkelBuilder::default(); skel_builder.obj_builder.debug(opts.verbose > 1); init_libbpf_logging(None); - let mut skel = scx_ops_open!(skel_builder, open_object, layered)?; + let mut skel = skel_builder.open(open_object).context("failed to open BPF program")?; // scheduler_tick() got renamed to sched_tick() during v6.10-rc. let sched_tick_name = match compat::ksym_exists("sched_tick")? { @@ -1657,11 +1721,17 @@ impl<'a, 'b> Scheduler<'a, 'b> { Self::init_layers(&mut skel, opts, layer_specs, &topo)?; Self::init_nodes(&mut skel, opts, &topo); - let mut skel = scx_ops_load!(skel, layered, uei)?; + let mut skel = skel.load().context("Failed to load BPF program")?; let mut layers = vec![]; for (idx, spec) in layer_specs.iter().enumerate() { - layers.push(Layer::new(&spec, idx, &cpu_pool, &topo)?); + layers.push(Layer::new( + idx, + &cpu_pool, + &spec.name, + spec.kind.clone(), + &topo, + )?); } // Other stuff. @@ -1674,11 +1744,10 @@ impl<'a, 'b> Scheduler<'a, 'b> { // huge problem in the interim until we figure it out. // Attach. - let struct_ops = scx_ops_attach!(skel, layered)?; let stats_server = StatsServer::new(stats::server_data()).launch()?; - let sched = Self { - struct_ops: Some(struct_ops), + let mut sched = Self { + struct_ops: None, layer_specs, sched_intv: Duration::from_secs_f64(opts.interval), @@ -1698,6 +1767,20 @@ impl<'a, 'b> Scheduler<'a, 'b> { stats_server, }; + sched + .skel + .attach() + .context("Failed to attach BPF program")?; + + sched.struct_ops = Some( + sched + .skel + .maps + .layered + .attach_struct_ops() + .context("Failed to attach layered struct ops")?, + ); + info!("Layered Scheduler Attached. Run `scx_layered --monitor` for metrics."); Ok(sched) @@ -1821,12 +1904,12 @@ impl<'a, 'b> Scheduler<'a, 'b> { Ok(sys_stats) } - fn run(&mut self, shutdown: Arc) -> Result<(), anyhow::Error> { -let (res_ch, req_ch) = self.stats_server.channels(); + fn run(&mut self, shutdown: Arc) -> Result<()> { + let (res_ch, req_ch) = self.stats_server.channels(); let mut next_sched_at = Instant::now() + self.sched_intv; let mut cpus_ranges = HashMap::>::new(); - while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel, uei) { + while !shutdown.load(Ordering::Relaxed) && !UserExitInfo::exited(&self.skel.maps.bss_data.uei)? { let now = Instant::now(); if now >= next_sched_at { @@ -1873,9 +1956,7 @@ let (res_ch, req_ch) = self.stats_server.channels(); } self.struct_ops.take(); - let uei: UserExitInfo = uei_report!(&self.skel, uei)?; - uei.report()?; - Ok(()) + UserExitInfo::read(&self.skel.maps.bss_data.uei)?.report() } } @@ -2053,6 +2134,10 @@ fn main() -> Result<()> { ); } + if opts.open_metrics_format { + warn!("open_metrics_format is deprecated"); + } + debug!("specs={}", serde_json::to_string_pretty(&layer_config)?); verify_layer_specs(&layer_config.specs)?; @@ -2075,5 +2160,6 @@ fn main() -> Result<()> { let mut open_object = MaybeUninit::uninit(); let mut sched = Scheduler::init(&opts, &layer_config.specs, &mut open_object)?; - sched.run(shutdown.clone()) + sched.run(shutdown.clone()) } +