Skip to content

Commit

Permalink
Merge ""scripts: a few fixes in posix_net_conf.sh" from Vlad
Browse files Browse the repository at this point in the history
"1) Ban irqbalance from moving NIC's IRQs.
2) Exclude CPU0 hyper-threading sibling from RPS configuration.
3) Configure XPS.
4) Add a separate mode for multi-queue NICs:
   - Equally distribute IRQs among all CPUs instead of binding the all to CPU0.
   - Don't configure RPS.
5) Set some backlogs' limits.
"
  • Loading branch information
avikivity committed Dec 3, 2015
2 parents c31cac2 + a6a94ab commit 7451904
Showing 1 changed file with 155 additions and 75 deletions.
230 changes: 155 additions & 75 deletions scripts/posix_net_conf.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
#!/bin/bash
# !
# ! Usage: posix_net_conf.sh [iface name, eth0 by default]
# ! Usage: posix_net_conf.sh [iface name, eth0 by default] [-mq] [-h|--help]
# !
# ! Sets all IRQs of a given NIC to CPU0 and configures RPS to spreads NAPIs'
# ! handling between other CPUs in the way that NAPIs are distributed between
# ! CPUs as equally as possible.
# ! Ban NIC IRQs from being moved by irqbalance.
# !
# ! If -mq is not given - set all IRQs of a given NIC to CPU0 and configure RPS
# ! to spreads NAPIs' handling between other CPUs.
# !
# ! If "-mq" is given - distribute NIC's IRQs among all CPUs instead of binding
# ! them all to CPU0 and do not enable RPS.
# !
# ! Enable XPS, increase the default values of somaxconn and tcp_max_syn_backlog.
# !
# ! -h|--help - print this help information
# !

make_hex_mask()
Expand All @@ -24,108 +32,180 @@ make_hex_mask()
}

#
# set_one_mask <queue index> <CPU mask>
# set_one_mask <config file> <CPU mask>
#
set_one_mask()
{
local i=$1
local cpuset_conf_file=$1
local mask=$2
local rps_cpus="/sys/class/net/$IFACE/queues/rx-$i/rps_cpus"
local mask_hex=`make_hex_mask $mask`
echo "Setting mask $mask_hex in $rps_cpus"
echo $mask_hex > $rps_cpus
echo "Setting mask $mask_hex in $cpuset_conf_file"
echo $mask_hex > $cpuset_conf_file
}

#
# Bind RPS queues to CPUs other than CPU0
#
# If there is a single CPU in the system - don't enable RPS
#
# If there is more CPUs than (RPS queues plus 1) then every queue will get
# (NCPUS - 1)/NQUEUES CPUs and (NCPUS - 1) % NQUEUES lower queues will get one
# additional CPU.
# For instance, if NCPUs=15 and NQUEUES=3, then configuration will be as follows:
# Q0: {1,2,3,4,5}, Q1: {6,7,8,9,10} Q2: {11,12,13,14}
#
# If there is less CPUs than (RPS queues plus 1) when each queue will get a single CPU and
# CPUs will be used in a round robin way.
# For instance, if NCPUs=3 and NQUEUES=5, then configuration will be as follows:
# Q0: {1}, Q1: {2} Q2: {1}, Q3: {2}, Q4: {1}
# Bind RPS queues to CPUs other than CPU0 and its hyper-threading siblings
#
# If CPUs number equals to number of RPS queues plus 1 then each queue will get
# a separate CPU.
# Use hwloc-distrib for generating the appropriate CPU masks.
#
setup_rps()
{
# If we are in a single CPU environment - there is no point in configuring RPS
[[ $CPU_NUM -eq 1 ]] && return

# CPU0 will not be used since we are going to bind HW IRQs to it
local cpu_num=$(( CPU_NUM - 1 ))
# If we are in a single core environment - there is no point in configuring RPS
[[ `hwloc-calc core:0.pu:all` -eq `hwloc-calc all` ]] && return

local rps_queues_count=`ls -1 /sys/class/net/$IFACE/queues/*/rps_cpus | wc -l`
local cpus_per_q=$((cpu_num / rps_queues_count))
local mask
local i=0

#
# If for some strange reason HW driver enabled more HW queues than a number
# of CPUs bind each HW queue to a single CPU.
# We will have more than a single queue per CPU in this case.
#
[[ $cpus_per_q -eq 0 ]] && cpus_per_q=1
# Distribute all cores except for CPU0 siblings
for mask in `hwloc-distrib --restrict $(hwloc-calc all ~core:0) $rps_queues_count`
do
set_one_mask "/sys/class/net/$IFACE/queues/rx-$i/rps_cpus" $mask
i=$(( i + 1 ))
done
}

declare -a local cpus_per_q_ar
local i
#
# Spread all XPS queues to over the full cpuset. Don't bother to exclude CPU0
# (and friends) - scylla will just not send from it if its cpuset is properly set.
#
setup_xps()
{
local xps_queues_count=`ls -1 /sys/class/net/$IFACE/queues/*/xps_cpus | wc -l`
local mask
local i=0

# Fill the array with "cpus_per_q" values first
for (( i = 0; i < rps_queues_count; i++ ))
for mask in `hwloc-distrib $xps_queues_count`
do
cpus_per_q_ar[i]=$cpus_per_q
set_one_mask "/sys/class/net/$IFACE/queues/tx-$i/xps_cpus" $mask
i=$(( i + 1 ))
done
}

# Spread the "cpu_num - cpus_per_q * rps_queues_count" among queues 0, 1, 2,...
local left_cpus=$(( cpu_num - cpus_per_q * rps_queues_count ))
distribute_irqs()
{
local irqs=( `cat /proc/interrupts | grep $IFACE | cut -d":" -f1` )
local mask
local i=0

if [[ $left_cpus -gt 0 ]]; then
for (( i = 0; i < rps_queues_count; i++ ))
do
cpus_per_q_ar[i]=$(( cpus_per_q_ar[i] + 1))
left_cpus=$(( left_cpus - 1 ))
[[ $left_cpus -eq 0 ]] && break
done
for mask in `hwloc-distrib ${#irqs[*]}`
do
set_one_mask "/proc/irq/${irqs[$i]}/smp_affinity" $mask
i=$(( i + 1 ))
done
}

restart_irqbalance()
{
local config_file="/etc/default/irqbalance"
local options_key="OPTIONS"
local systemd=""

# return early if irqbalance is not running
! ps -elf | grep irqbalance | grep -v grep &>/dev/null && return

if ! test -f $config_file; then
if test -f /etc/sysconfig/irqbalance; then
config_file="/etc/sysconfig/irqbalance"
options_key="IRQBALANCE_ARGS"
systemd="yes"
else
echo "Unknown system configuration - not restarting irqbalance!"
echo "You have to prevent it from moving $IFACE IRQs manually!"
return
fi
fi

local mask
local cur_mask_shift=0
for (( i = 0; i < rps_queues_count; i++ ))
local orig_file="$config_file.scylla.orig"

# Save the original file
! test -f $orig_file && cp $config_file $orig_file

# Remove options parameter if exists
local tmp_file=`mktemp`
egrep -v -w ^"\s*$options_key" $config_file > $tmp_file
mv $tmp_file $config_file

echo -n "Restarting irqbalance: going to ban the following IRQ numbers: "

local new_options="$options_key=\""
local irq
for irq in `cat /proc/interrupts | grep $IFACE | cut -d":" -f1`
do
# Build a mask for a current queue, skip CPU0
mask=$(( ((1 << cpus_per_q_ar[i]) - 1) << (1 + cur_mask_shift) ))
new_options="$new_options --banirq=$irq"
echo -n "$irq "
done

cur_mask_shift=$(( cur_mask_shift + cpus_per_q_ar[i] ))
if [[ $cur_mask_shift -ge $cpu_num ]]; then
cur_mask_shift=$(( cur_mask_shift - cpu_num ))
fi
echo "..."
echo "Original irqbalance configuration is in $orig_file"

new_options="$new_options\""
echo $new_options >> $config_file

if [[ -z "$systemd" ]]; then
/etc/init.d/irqbalance restart
else
systemctl try-restart irqbalance
fi
}

usage()
{
cat $0 | grep ^"# !" | cut -d"!" -f2-
}

set_one_mask $i $mask
parse_args()
{
if [[ $# -gt 2 ]]; then
usage
exit 1
fi

for arg in $@
do
case "$arg" in
"-mq")
MQ_MODE="yes"
;;
"-h"|"--help")
usage
exit 0
;;
*)
IFACE=$arg
;;
esac
done
}

if [[ $# -eq 0 ]]; then
IFACE="eth0"
else
IFACE=$1
fi
IFACE="eth0"
MQ_MODE=""

parse_args $@

CPU_NUM=`cat /proc/cpuinfo | grep processor | wc -l`
CPUS_MASK=$(( (1 << CPU_NUM) - 1 ))
# Ban irqbalance from moving NICs IRQs
restart_irqbalance

# bind all NIC IRQs to CPU0
for irq in `cat /proc/interrupts | grep $IFACE | cut -d":" -f1`
do
echo "Binding IRQ $irq to CPU0"
echo 1 > /proc/irq/$irq/smp_affinity
done
if [[ -z "$MQ_MODE" ]]; then
for irq in `cat /proc/interrupts | grep $IFACE | cut -d":" -f1`
do
echo "Binding IRQ $irq to CPU0"
echo 1 > /proc/irq/$irq/smp_affinity
done

# Setup RPS
setup_rps
else
distribute_irqs
fi

# Setup XPS
setup_xps

# Setup RPS
setup_rps
# Increase the socket listen() backlog
echo 4096 > /proc/sys/net/core/somaxconn

# Increase the maximum number of remembered connection requests, which are still
# did not receive an acknowledgment from connecting client.
echo 4096 > /proc/sys/net/ipv4/tcp_max_syn_backlog

0 comments on commit 7451904

Please sign in to comment.