Skip to content

Commit

Permalink
fix(bandwidth_scheduler) - Remove BandwidthRequestValues which can ne…
Browse files Browse the repository at this point in the history
…ver be granted (#12747)

As @matejpavlovic pointed out in his NEP review, the last few
`BandwidthRequestValues` can never be granted because granting them
along with base bandwidth would always cause the shard to exceed
`max_shard_bandwidth`.

In the current configuration with 6 shards the `base_bandwidth` is set
to `61139` and the last few values are `4194304, 4278056, 4389028,
4500000`.

But `4278056` can never be granted on a link because `4278056 + 5 *
61139 = 4583751`, which is more than `max_shard_bandwidth` (4_500_000).
That makes the last three values useless, they just take up space that
could be used for more precise bandwidth requests.

Let's solve it by adding a new parameter to `BandwidthSchedulerParams`:
`max_single_grant`
`max_single_grant` describes the maximum amount of bandwidth that can be
granted on a single link.
`BandwidthRequestValues` now go only up to `max_single_grant`, as there
is no point in requesting more than the maximum that can be granted. We
can also get rid of the weird logic of setting one value to
`max_receipt_size` because the last value always corresponds to
`max_single_grant`, and that's enough to send a max size receipt.
Base bandwidth is now calculated using `max_single_grant` instead of
`max_receipt_size`.

I think it makes much more sense than the previous logic with
`max_receipt_size`. Decoupling `max_receipt_size` and `max_single_grant`
is also more future proof, it'll keep working even if we reduce
`max_receipt_size` to say 1MB in the future.

The PR is divided into commits for easier review.

---------

Co-authored-by: Matej Pavlovic <[email protected]>
  • Loading branch information
jancionear and matejpavlovic authored Jan 17, 2025
1 parent b6f82d6 commit 515b5fa
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 88 deletions.
122 changes: 72 additions & 50 deletions core/primitives/src/bandwidth_scheduler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,11 @@ impl BandwidthRequest {
let mut bitmap = BandwidthRequestBitmap::new();
let values = BandwidthRequestValues::new(params).values;

// Find the first value which allows to send a max size receipt
let max_receipt_size_value_pos = values
.iter()
.position(|&value| value == params.max_receipt_size)
.expect("max_receipt_size should be in the values list");
.position(|&value| value >= params.max_receipt_size)
.expect("max_receipt_size is less than max_single_grant, a value should be found");
bitmap.set_bit(max_receipt_size_value_pos, true);

BandwidthRequest { to_shard: to_shard.into(), requested_values_bitmap: bitmap }
Expand Down Expand Up @@ -177,7 +178,7 @@ fn interpolate(min: u64, max: u64, i: u64, n: u64) -> u64 {
impl BandwidthRequestValues {
pub fn new(params: &BandwidthSchedulerParams) -> BandwidthRequestValues {
// values[-1] = base_bandwidth
// values[values.len() - 1] = max_shard_bandwidth
// values[values.len() - 1] = max_single_grant
// values[i] = linear interpolation between values[-1] and values[values.len() - 1]
// TODO(bandwidth_scheduler) - consider using exponential interpolation.
let mut values = [0; BANDWIDTH_REQUEST_VALUES_NUM];
Expand All @@ -187,32 +188,8 @@ impl BandwidthRequestValues {
for i in 0..values.len() {
let i_u64: u64 = i.try_into().expect("Converting usize to u64 shouldn't fail");

values[i] = interpolate(
params.base_bandwidth,
params.max_shard_bandwidth,
i_u64 + 1,
values_len,
);
}

// The value that is closest to max_receipt_size is set to max_receipt_size.
// Without this we could end up in a situation where one shard wants to send
// a maximum size receipt to another and requests the corresponding value from the list,
// but the sum of this value and base bandwidth exceeds max_shard_bandwidth.
// There's a guarantee that (num_shards - 1) * base_bandwidth + max_receipt_size <= max_shard_bandwidth,
// but there's no guarantee that (num_shards - 1) * base_bandwidth + request_value <= max_shard_bandwidth.
let mut closest_to_max: Bandwidth = 0;
for value in &values {
if value.abs_diff(params.max_receipt_size)
< closest_to_max.abs_diff(params.max_receipt_size)
{
closest_to_max = *value;
}
}
for value in values.iter_mut() {
if *value == closest_to_max {
*value = params.max_receipt_size;
}
values[i] =
interpolate(params.base_bandwidth, params.max_single_grant, i_u64 + 1, values_len);
}

BandwidthRequestValues { values }
Expand Down Expand Up @@ -335,9 +312,13 @@ pub struct LinkAllowance {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct BandwidthSchedulerParams {
/// This much bandwidth is granted by default.
/// base_bandwidth = (max_shard_bandwidth - max_single_grant) / (num_shards - 1)
pub base_bandwidth: Bandwidth,
/// The maximum amount of data that a shard can send or receive at a single height.
pub max_shard_bandwidth: Bandwidth,
/// The maximum amount of bandwidth that can be granted on a single link.
/// Should be at least as big as `max_receipt_size`.
pub max_single_grant: Bandwidth,
/// Maximum size of a single receipt.
pub max_receipt_size: Bandwidth,
/// Maximum bandwidth allowance that a link can accumulate.
Expand All @@ -348,36 +329,76 @@ impl BandwidthSchedulerParams {
/// Calculate values of scheduler params based on the current configuration
pub fn new(num_shards: NonZeroU64, runtime_config: &RuntimeConfig) -> BandwidthSchedulerParams {
// TODO(bandwidth_scheduler) - put these parameters in RuntimeConfig.
let max_shard_bandwidth: Bandwidth = 4_500_000;
let max_shard_bandwidth = 4_500_000;
let max_single_grant = 4 * 1024 * 1024;
let max_allowance = max_shard_bandwidth;
let max_base_bandwidth = 100_000;

let max_receipt_size = runtime_config.wasm_config.limit_config.max_receipt_size;

if max_shard_bandwidth < max_receipt_size {
panic!(
"max_shard_bandwidth is smaller than max_receipt_size! ({} < {}).
Invalid configuration - it'll be impossible to send a maximum size receipt.",
max_shard_bandwidth, max_receipt_size
);
}
Self::calculate(
max_shard_bandwidth,
max_single_grant,
max_allowance,
max_base_bandwidth,
max_receipt_size,
num_shards.get(),
)
}

// A receipt with maximum size must still be able to get through
// after base bandwidth is granted to everyone. We have to ensure that:
// base_bandwidth * (num_shards - 1) + max_receipt_size <= max_shard_bandwidth
let available_bandwidth = max_shard_bandwidth - max_receipt_size;
let mut base_bandwidth = available_bandwidth / std::cmp::max(1, num_shards.get() - 1);
fn calculate(
max_shard_bandwidth: Bandwidth,
max_single_grant: Bandwidth,
max_allowance: Bandwidth,
max_base_bandwidth: Bandwidth,
max_receipt_size: Bandwidth,
num_shards: u64,
) -> BandwidthSchedulerParams {
assert!(
max_single_grant >= max_receipt_size,
"A max_single_grant can't be lower than max_receipt_size - it'll be impossible to send a max size receipt"
);
assert!(
max_single_grant <= max_shard_bandwidth,
"A single grant must not be greater than max_shard_bandwidth"
);

// Granting `max_single_grant` on one link and `base_bandwidth` on all other links can't
// exceed `max_shard_bandwidth`, we have to ensure that:
// base_bandwidth * (num_shards - 1) + max_single_grant <= max_shard_bandwidth
// Base bandwidth is calculated by taking the bandwidth that would remain available after
// granting `max_single_grant` on one link and dividing it equally between the other links.
let available_bandwidth = max_shard_bandwidth - max_single_grant;
let mut base_bandwidth = available_bandwidth / std::cmp::max(1, num_shards - 1);
if base_bandwidth > max_base_bandwidth {
base_bandwidth = max_base_bandwidth;
}

BandwidthSchedulerParams {
base_bandwidth,
max_shard_bandwidth,
max_single_grant,
max_receipt_size,
max_allowance,
}
}

/// Example params, used only in tests
pub fn for_test(num_shards: u64) -> BandwidthSchedulerParams {
let max_shard_bandwidth = 4_500_000;
let max_single_grant = 4 * 1024 * 1024;
let max_allowance = max_shard_bandwidth;
let max_base_bandwidth = 100_000;
let max_receipt_size = 4 * 1024 * 1024;

Self::calculate(
max_shard_bandwidth,
max_single_grant,
max_allowance,
max_base_bandwidth,
max_receipt_size,
num_shards,
)
}
}

#[cfg(test)]
Expand Down Expand Up @@ -428,6 +449,7 @@ mod tests {
let expected = BandwidthSchedulerParams {
base_bandwidth: 100_000,
max_shard_bandwidth: 4_500_000,
max_single_grant: 4 * 1024 * 1024,
max_receipt_size,
max_allowance: 4_500_000,
};
Expand All @@ -446,6 +468,7 @@ mod tests {
let expected = BandwidthSchedulerParams {
base_bandwidth: (4_500_000 - max_receipt_size) / 5,
max_shard_bandwidth: 4_500_000,
max_single_grant: 4 * 1024 * 1024,
max_receipt_size,
max_allowance: 4_500_000,
};
Expand Down Expand Up @@ -495,18 +518,17 @@ mod tests {
let values = BandwidthRequestValues::new(&params);

assert!(values.values[0] > params.base_bandwidth);
assert_eq!(values.values[BANDWIDTH_REQUEST_VALUES_NUM - 1], params.max_shard_bandwidth);
assert!(values.values.contains(&max_receipt_size));
assert_eq!(values.values[BANDWIDTH_REQUEST_VALUES_NUM - 1], params.max_single_grant);

assert_eq!(params.base_bandwidth, 61139);
assert_eq!(
values.values,
[
172110, 283082, 394053, 505025, 615996, 726968, 837939, 948911, 1059882, 1170854,
1281825, 1392797, 1503768, 1614740, 1725711, 1836683, 1947654, 2058626, 2169597,
2280569, 2391541, 2502512, 2613484, 2724455, 2835427, 2946398, 3057370, 3168341,
3279313, 3390284, 3501256, 3612227, 3723199, 3834170, 3945142, 4056113, 4194304,
4278056, 4389028, 4500000
164468, 267797, 371126, 474455, 577784, 681113, 784442, 887772, 991101, 1094430,
1197759, 1301088, 1404417, 1507746, 1611075, 1714405, 1817734, 1921063, 2024392,
2127721, 2231050, 2334379, 2437708, 2541038, 2644367, 2747696, 2851025, 2954354,
3057683, 3161012, 3264341, 3367671, 3471000, 3574329, 3677658, 3780987, 3884316,
3987645, 4090974, 4194304
]
);
}
Expand Down
29 changes: 6 additions & 23 deletions core/store/src/trie/outgoing_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -690,24 +690,15 @@ mod tests {
}

/// The diff between two consecutive values that can be requested in a bandwidth request
/// is ~110 kB. The upper bound of the receipt group size is 100 kB. The groups are small
/// is ~103 kB. The upper bound of the receipt group size is 100 kB. The groups are small
/// enough that bandwidth requests produced from group sizes are optimal, optimal meaning
/// the same as if the requests were made based on individual receipt sizes.
/// The only exception is around `max_receipt_size` - the value that is the closest to
/// `max_receipt_size` is set to `max_receipt_size`, which causes the diff to be smaller.
#[test]
fn test_receipt_groups_produce_optimal_bandwidth_request() {
let scheduler_params = BandwidthSchedulerParams {
base_bandwidth: 50_000,
max_shard_bandwidth: 4_500_000,
max_receipt_size: 4 * 1024 * 1024,
max_allowance: 4_500_000,
};
let scheduler_params = BandwidthSchedulerParams::for_test(6);
let request_values = BandwidthRequestValues::new(&scheduler_params).values;
assert!(request_values[1] - request_values[0] > 110_000);
assert!(request_values[1] - request_values[0] > 102_000);

let max_receipt_size_pos =
request_values.iter().position(|v| v == &scheduler_params.max_receipt_size).unwrap();
let groups_config =
ReceiptGroupsConfig { size_upper_bound: ByteSize::kb(100), gas_upper_bound: Gas::MAX };

Expand Down Expand Up @@ -743,19 +734,11 @@ mod tests {
&scheduler_params,
)
.unwrap();
if let Some(mut ideal_request) = ideal_bandwidth_request {
let mut groups_request = groups_bandwidth_request.unwrap();

// Zero out everything after `max_receipt_size` in both requests before comparison.
for i in max_receipt_size_pos..request_values.len() {
ideal_request.requested_values_bitmap.set_bit(i, false);
groups_request.requested_values_bitmap.set_bit(i, false);
}
assert_eq!(ideal_request, groups_request);
}
// Bandwidth request produced from receipt groups should be optimal (same as from
// individual receipt sizes).
assert_eq!(ideal_bandwidth_request, groups_bandwidth_request);

// Bandwidth request produced from receipt groups should be optimal (same as from individual
// receipt sizes). (up to `max_receipt_size`)
let new_receipt_size = ByteSize::b(get_random_receipt_size_for_test(rng));
buffered_receipts.push_back(new_receipt_size);
test_queue.update_on_receipt_pushed(new_receipt_size, 1, &groups_config);
Expand Down
7 changes: 1 addition & 6 deletions runtime/runtime/src/bandwidth_scheduler/scheduler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -763,12 +763,7 @@ mod tests {
let shard_layout = ShardLayout::multi_shard(num_shards, 0);

// Standard params
let params = BandwidthSchedulerParams {
base_bandwidth: 50_000,
max_shard_bandwidth: 4_500_000,
max_receipt_size: 4 * 1024 * 1024,
max_allowance: 4_500_000,
};
let params = BandwidthSchedulerParams::for_test(num_shards);

// Every link has a different allowance, which should make the `requests_by_allowance` BTreeMap
// as large as possible.
Expand Down
13 changes: 4 additions & 9 deletions runtime/runtime/src/bandwidth_scheduler/simulator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,7 @@ impl ChainSimulator {

fn scheduler_params(&self) -> BandwidthSchedulerParams {
// TODO(bandwidth_scheduler) - use current RuntimeConfig?
BandwidthSchedulerParams {
base_bandwidth: (4_500_000 - 4 * 1024 * 1024) / self.shard_layout.num_shards() as u64,
max_shard_bandwidth: 4_500_000,
max_receipt_size: 4 * 1024 * 1024,
max_allowance: 4_500_000,
}
BandwidthSchedulerParams::for_test(self.shard_layout.num_shards())
}

/// Make sure that post state of previous chunk application is always pre state for the next application.
Expand Down Expand Up @@ -592,9 +587,9 @@ fn test_bandwidth_scheduler_simulator_missing_chunks() {
.missing_chunk_probability(0.1)
.build();
let summary = run_scenario(scenario);
assert!(summary.bandwidth_utilization > 0.6); // 75% utilization
assert!(summary.link_imbalance_ratio < 1.4); // < 40% difference on links
assert!(summary.worst_link_estimation_ratio > 0.55); // 55% of estimated link throughput
assert!(summary.bandwidth_utilization > 0.7); // > 70% utilization
assert!(summary.link_imbalance_ratio < 1.6); // < 60% difference on links
assert!(summary.worst_link_estimation_ratio > 0.50); // 50% of estimated link throughput

// Incoming max_shard_bandwidth is not respected! When a chunk is missing, the receipts that
// were sent previously will arrive later and they can mix with other incoming receipts, and the
Expand Down

0 comments on commit 515b5fa

Please sign in to comment.