File: //bin/virt-net-optimize.sh
#!/bin/bash
#
# This script is used to set networking multiqueue or RPS/XPS/RFS on BCC.
# Set networking multiqueue channel numbers
# Set RPS/RFS when networking multiqueue is disabled
# Set XPS
# Stop irqbalance service
# Bind the nic interrupts with CPU core
#
#chkconfig: 35 8 25
#
cur_corenum=0
rps_flow_cnt=4096
sysfs_path="/sys/devices/system/cpu"
core_thread_A=()
core_thread_B=()
cpu_nums=$(grep -c processor /proc/cpuinfo)
threads_per_core=$(lscpu |grep -i "Thread(s) per core" |awk -F: '{print $2}' |sed -e 's/^[ ]*//g' | sed -e 's/[ ]*$//g')
core_nums=$(expr $cpu_nums / $threads_per_core)
function build_cpu_topo(){
lscpu_info=$(lscpu -p)
for((i=0;i<$cpu_nums;i++))
do
core_id=$(echo "$lscpu_info" |grep -wE "^$i" | awk -F',' '{print $2}')
cat ${sysfs_path}/cpu${i}/topology/thread_siblings_list |grep "-" 1>/dev/null
if [ $? -eq 0 ]; then
core_thread_A[$core_id]=$(cat ${sysfs_path}/cpu${i}/topology/thread_siblings_list |awk -F'-' '{print $1}')
core_thread_B[$core_id]=$(cat ${sysfs_path}/cpu${i}/topology/thread_siblings_list |awk -F'-' '{print $2}')
else
core_thread_A[$core_id]=$(cat ${sysfs_path}/cpu${i}/topology/thread_siblings_list |awk -F',' '{print $1}')
core_thread_B[$core_id]=$(cat ${sysfs_path}/cpu${i}/topology/thread_siblings_list |awk -F',' '{print $2}')
fi
echo "core_nums=$core_nums,core_thread_A[$core_id]:${core_thread_A[$core_id]},core_thread_B[$core_id]:${core_thread_B[$core_id]}" | logger -i -t 'virt-net-init'
done
}
#get specified mask
function get_specified_cpumask(){
local cpu_num=$1
quotient=$((${cpu_num}/32))
remainder=$((${cpu_num}-32*$quotient))
if [ ${quotient} -gt 0 ]; then
res_tail=""
res_head="80000000"
while [ $quotient -gt 1 ]
do
res_tail="${res_tail},00000000"
((quotient--))
done
if [ $remainder -ne 0 ];then
res_tail="${res_tail},00000000"
res_head=$((1<<($remainder-1)))
res_head=`printf "%x" ${res_head}`
fi
result="${res_head}${res_tail}"
else
result=$((1<<(${cpu_num}-1)))
result=`printf "%x" $result`
fi
echo $result
}
#get all mask for rps/xps
function get_all_cpus() {
quotient=$((${cpu_nums}/32))
remainder=$((${cpu_nums}-32*$quotient))
if [ ${quotient} -gt 0 ];then
res_head="ffffffff"
res_tail=""
while [ $quotient -gt 1 ]
do
res_tail="${res_tail},ffffffff"
((quotient--))
done
if [ $remainder -ne 0 ];then
res_tail="${res_tail},ffffffff"
res_head=$(((1<<$remainder)-1))
res_head=`printf "%x" ${res_head}`
fi
result="${res_head}${res_tail}"
else
result=$(((1<<${cpu_nums})-1))
result=`printf "%x" $result`
fi
echo $result
}
#set xps
function set_xps() {
dev=$1
for xps_file in `ls /sys/class/net/$dev/queues/tx-*/xps_cpus`
do
xps_cpus=$(get_all_cpus)
echo "[INFO] set ${xps_cpus} into ${xps_file}." | logger -i -t 'virt-net-init'
echo ${xps_cpus} > ${xps_file}
done
}
#set rps/rfs for multicore when multiqueue is disbale
function set_rps_and_rfs(){
rps_cpus=$(get_all_cpus)
dev=$1
queues=`ls -ld /sys/class/net/$dev/queues/rx-* | wc -l`
num=0
while [ $num -lt $queues ]
do
echo ${rps_cpus} > /sys/class/net/$dev/queues/rx-$num/rps_cpus
echo "[INFO] set ${rps_cpus} into /sys/class/net/$dev/queues/rx-$num/rps_cpus." | logger -i -t 'virt-net-init'
echo ${rps_flow_cnt} > /sys/class/net/$dev/queues/rx-$num/rps_flow_cnt
echo "[INFO] set ${rps_flow_cnt} into /sys/class/net/$dev/queues/rx-$num/rps_flow_cnt." | logger -i -t 'virt-net-init'
((num++))
done
}
function set_irq_affinity(){
irq_nums=$1
local cur_cpunum=0
for irqnum in ${irq_nums[*]}; do
if [ $cur_corenum -lt $core_nums ]; then
cur_cpunum=${core_thread_A[$cur_corenum]}
else
tmp_core=$[$cur_corenum-$core_nums]
cur_cpunum=${core_thread_B[$tmp_core]}
fi
mask=`get_specified_cpumask $((${cur_cpunum}+1))`
echo "[INFO] irq:${irqnum} bind to cpu:$mask,cur_corenum=$cur_corenum,cur_cpunum=$cur_cpunum" | logger -i -t 'virt-net-init'
echo $mask > /proc/irq/$irqnum/smp_affinity
((cur_corenum++))
[ $cur_corenum -ge $cpu_nums ] && cur_corenum=0
done
}
function set_smp_affinity_virtio(){
local dev=$1
pci_dbdf=$(ethtool -i $dev | grep bus-info | cut -d' ' -f2)
if [ -z $pci_dbdf ]; then
echo "[ERR] No NIC detected"
else
dir=$(find /sys/devices/ -type d | grep $pci_dbdf | grep "/net$")
result=${dir%/*}
# get virtioX
virtio_num=${result##*/}
if [ ${cpu_nums} -le 2 ];then
echo "[INFO] don't need set affinity for net interrupt!!"
else
# by queue
direction_key=(input output)
! $by_queue && direction_key=(".*put")
cur_corenum_saved=$cur_corenum
for direction in ${direction_key[*]}; do
cur_corenum=$cur_corenum_saved
irq_direction=$(grep ${virtio_num}-${direction} /proc/interrupts | sed "s/: .*//g" | sed "s/^ *//g")
set_irq_affinity "${irq_direction[*]}"
done
fi
fi
}
function set_smp_affinity_vfio(){
local dev=$1
local dir=$2
if [ ${cpu_nums} -le 2 ];then
echo "[INFO] don't need set affinity for net interrupt!!"
else
# such as eth0-xx, xgbe0-xx
queue_num="${dev}-.*"
irq_nums=$(grep "${queue_num}" /proc/interrupts | cut -f1 -d:)
if [[ -z "${irq_nums}" ]]; then
bdf_num=${dir%/*}
bdf_num=${bdf_num##*/}
queue_num="mlx5_comp.*${bdf_num}|mlx6_comp.*${bdf_num}"
irq_nums=$(grep -iE "${queue_num}" /proc/interrupts | cut -f1 -d:)
fi
if [[ -z "${irq_nums}" ]]; then
echo "There is no interrupts for NIC found. Please check ... ..."
return
fi
set_irq_affinity "${irq_nums[*]}"
fi
}
#set multiqueue
function set_multiqueue() {
local dev=$1
local pre_set=`ethtool -l ${dev} | grep Combined | head -n 1 | cut -f 2`
local cur_set=`ethtool -l ${dev} | grep Combined | tail -n 1 | cut -f 2`
#virtio-net will enable multiqueue by default
if [ ${pre_set} -gt ${cur_set} ];then
ethtool -L ${dev} combined ${pre_set} 2>&1 | logger -i -t 'virt-net-init'
if [ "${PIPESTATUS[0]}" -ne 0 ]; then
echo "[WARN] set multiqueue[chnum = ${cur_set}] for $dev failed. Ignore this if multiqueue is already set or chnum = 1." | logger -i -t 'virt-net-init'
fi
else
echo "[INFO] multiqueue[chnum= ${cur_set}] for $dev is enabled." | logger -i -t 'virt-net-init'
fi
}
function main(){
build_cpu_topo
nic_cnt=$(lspci |grep -i ethernet |wc -l)
for (( i = 0; i < 5; i++ )); do
dir_list=$(find /sys/devices/ -type d | grep "/net$" |grep -v "virtual")
if [[ $(echo $dir_list |wc -w) -eq $nic_cnt ]]; then
break
fi
sleep 0.5
done
echo "[INFO] wait $i times,dir_list=$dir_list" | logger -i -t 'virt-net-init'
total_queues=0
for dir in ${dir_list[*]}; do
dev=$(ls $dir)
if [[ -n "$dev" ]]; then
#check if multiqueue is available
queue_nums=`ethtool -l $dev 2>&- | grep "Combined" | head -n 1 | awk '{print $2}'`
if [ "$?" == "0" ] && [ ${queue_nums} -gt 1 ];then
#set multiqueue and affinity
#echo "set multiqueue for ${dev},and rps/rfs don't need to set."
set_multiqueue ${dev}
ethtool -i $dev 2>&- | grep "virtio_net"
if [ $? -eq 0 ] ; then
echo "[INFO] dev:$dev is virtio type,queue_nums=$queue_nums" | logger -i -t 'virt-net-init'
set_smp_affinity_virtio ${dev}
else
echo "[INFO] dev:$dev is vfio type,queue_nums=$queue_nums" | logger -i -t 'virt-net-init'
set_smp_affinity_vfio ${dev} ${dir}
fi
#continue
else
#set rps and rfs
set_rps_and_rfs ${dev}
queues=`ls -ld /sys/class/net/$dev/queues/rx-* | wc -l`
total_queues=$((${total_queues}+$queues))
set_xps ${dev}
fi
else
echo "[WARN] net device isn't exist." | logger -i -t 'virt-net-init'
fi
done
if [ ${total_queues} -ne 0 ];then
rps_sock_flow_entries=$((total_queues*${rps_flow_cnt}))
echo ${rps_sock_flow_entries} > /proc/sys/net/core/rps_sock_flow_entries
echo "[INFO] set ${rps_sock_flow_entries} for rfs." | logger -i -t 'virt-net-init'
fi
}
# Bind irq followed the direction of queue by default
by_queue=true
# Bind irq followed cpu-processors by default
by_proc=true
for arg in ${*}; do
echo "[INFO] input arg:$arg"
case $arg in
core*)
by_proc=false; shift ;;
direct*)
by_queue=false; shift ;;
proc*)
by_proc=true; shift ;;
queue*)
by_queue=true; shift ;;
*)
;;
esac
done
main