I use the 10G gbe-phy link the switch, but in the iperf3 test, I found:
| 测试方向/内容 | 测试协议 | 测试时间 | 发送者速率 | 接收者速率 | 发送者抖动 | 接收者抖动 | 发送者丢包率 | 接收者丢包率 | 测试命令 |
|---|---|---|---|---|---|---|---|---|---|
| PC1->PC2 | 标准 TCP 上行 | 30s | 4.80 Gbits/sec | 4.80 Gbits/sec | / | / | / | / | iperf3 -c 192.168.3.198 -t 30 |
| PC1<-PC2 | 标准 TCP 下行 | 30s | 8.24 Gbits/sec | 8.24 Gbits/sec | / | / | / | / | iperf3 -c 192.168.3.198 -t 30 -R |
| PC1<->PC2 | 双向同时测试(TCP) | 30s | 上行 TX-C(客户端→服务器)3.70 Gbits/sec | 3.70 Gbits/sec | / | / | / | / | iperf3 -c 192.168.3.198 -t 30 --bidir |
| 下行 RX-C(服务器→客户端)6.57 Gbits/sec | 6.57 Gbits/sec | / | |||||||
| PC1->PC2 | 标准 TCP 上行(4并行) | 30s | 8.82 Gbits/sec | 8.82 Gbits/sec | / | / | / | / | iperf3 -c 192.168.3.198 -t 30 -P 4 |
| PC1<-PC2 | 标准 TCP 下行(4并行) | 30s | 7.97 Gbits/sec | 7.97 Gbits/sec | / | / | / | / | iperf3 -c 192.168.3.198 -t 30 -P 4 -R |
| PC1<->PC2 | 双向同时测试(TCP)(4并行) | 30s | 上行 TX-C(客户端→服务器)4.47 Gbits/sec | 4.47 Gbits/sec | / | / | / | / | iperf3 -c 192.168.3.198 -t 30 --bidir -P 4 |
| 下行 RX-C(服务器→客户端)6.82 Gbits/sec | 6.81 Gbits/sec | / | |||||||
| PC1->PC2 | UDP 10Gbps(万兆网络压力测试) | 30s | 2.19 Gbits/sec | 2.17 Gbits/sec | 0.000 ms | 0.014 ms | 0/5666977 (0%) | 39307/5666977 (0.69%) | iperf3 -c 192.168.3.198 -t 30 -u -b 10G |
| PC1<-PC2 | UDP 10Gbps 反向(测下行抖动丢包) | 30s | 1.90 Gbits/sec | 1.90 Gbits/sec | 0.000 ms | 0.006 ms | 0/4929630 (0%) | 306/4929630 (0.0062%) | iperf3 -c 192.168.3.198 -t 30 -u -b 10G -R |
| PC1->PC2 | 小包-模拟语音业务(1000字节) | 30s | 1.53 Gbits/sec | 1.32 Gbits/sec | 0.000 ms | 0.007 ms | 0/5740043 (0%) | 807743/5740043 (14%) | iperf3 -c 192.168.3.198 -t 30 -u -b 10G -l 1000 |
| PC1->PC2 | 大包-模拟文件传输(8K字节,4路并行) | 30s | 8.95 Gbits/sec | 8.94 Gbits/sec | / | / | / | / | iperf3 -c 192.168.3.198 -t 30 -l 8K -P 4 |
| PC1->PC2 | 标准MTU包(1470字节,接近以太网最大) | 30s | 2.30 Gbits/sec | 2.27 Gbits/sec | 0.000 ms | 0.004 ms | 0/5871060 (0%) | 92703/5871060 (1.6%) | iperf3 -c 192.168.3.198 -t 30 -u -b 10G -l 1470 |
| PC1->PC2 | 5分钟稳定性(4路并行) | 300s | 7.90 Gbits/sec | 7.90 Gbits/sec | / | / | / | / | iperf3 -c 192.168.3.198 -t 300 -P 4 |
So, Then I test udp multiple cpu:
#!/bin/bash
# ============================================================
# UDP 多流并行测试脚本 (Linux 客户端)
# 依赖: iperf3, taskset, bc (通常系统自带)
# ============================================================
# ------------------------------------------------------------
# 配置区 - 按需修改
# ------------------------------------------------------------
SERVER_IP="192.168.3.198" # 对端 Windows IP
BASE_PORT=5201 # 起始端口,与服务端脚本保持一致
NUM_STREAMS=10 # 并行流数量(与服务端一致)
DURATION=30 # 每条流的测试时长(秒)
UDP_BANDWIDTH="1G" # 每条流的目标带宽,如 1G / 500M / 2G
UDP_PKT_LEN=1400 # UDP 包大小(字节),建议与 MTU-28 对齐
CPORT_BASE=10001 # 客户端起始源端口,每条流依次 +1
RESULT_DIR="/tmp/iperf3_results" # 结果保存目录
# CPU 亲和性配置
# 填写希望使用的 CPU 核心编号,数量应 >= NUM_STREAMS
# 示例: CPU_LIST=(0 2 4 6 8 10 1 3 5 7) 使用10个核
CPU_LIST=(0 1 2 3 4 5 6 7 8 9 10 11)
# ------------------------------------------------------------
# 初始化
# ------------------------------------------------------------
mkdir -p "$RESULT_DIR"
rm -f "$RESULT_DIR"/stream_*.log "$RESULT_DIR"/summary.txt
echo "============================================================"
echo " iperf3 UDP 多流并行测试"
echo " 目标服务器 : ${SERVER_IP}"
echo " 流数量 : ${NUM_STREAMS}"
echo " 每流带宽 : ${UDP_BANDWIDTH}"
echo " 测试时长 : ${DURATION}s"
echo " 包长度 : ${UDP_PKT_LEN} bytes"
echo " 起始端口 : ${BASE_PORT}"
echo "============================================================"
echo ""
# ------------------------------------------------------------
# 校验 CPU 列表数量是否足够
# ------------------------------------------------------------
if [ ${#CPU_LIST[@]} -lt "$NUM_STREAMS" ]; then
echo "[WARNING] CPU_LIST 中核心数 (${#CPU_LIST[@]}) < NUM_STREAMS ($NUM_STREAMS)"
echo " 部分流将复用同一个核心"
fi
# ------------------------------------------------------------
# 启动所有 iperf3 实例(后台并行)
# ------------------------------------------------------------
PIDS=()
for (( i=0; i<NUM_STREAMS; i++ )); do
PORT=$(( BASE_PORT + i ))
CPORT=$(( CPORT_BASE + i ))
CPU_IDX=$(( i % ${#CPU_LIST[@]} ))
CPU="${CPU_LIST[$CPU_IDX]}"
LOG_FILE="${RESULT_DIR}/stream_${i}.log"
echo "[Stream $i] port=${PORT} cport=${CPORT} cpu=${CPU} log=${LOG_FILE}"
taskset -c "$CPU" iperf3 \
-c "$SERVER_IP" \
-p "$PORT" \
--cport "$CPORT" \
-u \
-b "$UDP_BANDWIDTH" \
-l "$UDP_PKT_LEN" \
-t "$DURATION" \
--json \
> "$LOG_FILE" 2>&1 &
PIDS+=($!)
done
echo ""
echo "全部 ${NUM_STREAMS} 条流已启动,等待测试完成(约 ${DURATION}s)..."
echo ""
# ------------------------------------------------------------
# 等待所有实例结束
# ------------------------------------------------------------
for PID in "${PIDS[@]}"; do
wait "$PID"
done
echo "============================================================"
echo " 测试完成,汇总结果:"
echo "============================================================"
# ------------------------------------------------------------
# 汇总结果
# ------------------------------------------------------------
TOTAL_BPS=0
TOTAL_LOST=0
TOTAL_SENT=0
FAIL_COUNT=0
{
echo "============================================================"
echo " iperf3 UDP 多流并行测试结果汇总"
echo " 时间: $(date)"
echo " 服务器: ${SERVER_IP} 流数: ${NUM_STREAMS} 时长: ${DURATION}s"
echo " 每流带宽: ${UDP_BANDWIDTH} 包长: ${UDP_PKT_LEN}B"
echo "------------------------------------------------------------"
printf " %-8s %-12s %-12s %-10s %-10s %-10s\n" \
"Stream" "Port" "CPU" "Throughput" "Lost" "Lost%"
echo "------------------------------------------------------------"
} | tee "$RESULT_DIR/summary.txt"
for (( i=0; i<NUM_STREAMS; i++ )); do
PORT=$(( BASE_PORT + i ))
CPU_IDX=$(( i % ${#CPU_LIST[@]} ))
CPU="${CPU_LIST[$CPU_IDX]}"
LOG_FILE="${RESULT_DIR}/stream_${i}.log"
if [ ! -f "$LOG_FILE" ]; then
printf " %-8s %-12s %-12s %-10s\n" "$i" "$PORT" "cpu${CPU}" "MISSING" \
| tee -a "$RESULT_DIR/summary.txt"
(( FAIL_COUNT++ ))
continue
fi
# 从 JSON 中提取关键字段
# iperf3 --json 输出中 end.sum.bits_per_second / lost_packets / packets
BPS=$(python3 -c "
import json, sys
try:
d = json.load(open('$LOG_FILE'))
s = d.get('end', {}).get('sum', {})
print(int(s.get('bits_per_second', 0)))
except:
print(0)
" 2>/dev/null)
LOST=$(python3 -c "
import json, sys
try:
d = json.load(open('$LOG_FILE'))
s = d.get('end', {}).get('sum', {})
print(int(s.get('lost_packets', 0)))
except:
print(-1)
" 2>/dev/null)
SENT=$(python3 -c "
import json, sys
try:
d = json.load(open('$LOG_FILE'))
s = d.get('end', {}).get('sum', {})
print(int(s.get('packets', 0)))
except:
print(0)
" 2>/dev/null)
# 格式化输出
if [ "$BPS" -gt 0 ] 2>/dev/null; then
GBPS=$(python3 -c "print(f'{$BPS/1e9:.3f} Gbps')")
if [ "$SENT" -gt 0 ] 2>/dev/null && [ "$LOST" -ge 0 ] 2>/dev/null; then
LOST_PCT=$(python3 -c "print(f'{$LOST/$SENT*100:.2f}%')")
else
LOST_PCT="N/A"
fi
TOTAL_BPS=$(( TOTAL_BPS + BPS ))
TOTAL_LOST=$(( TOTAL_LOST + LOST ))
TOTAL_SENT=$(( TOTAL_SENT + SENT ))
else
GBPS="ERROR"
LOST_PCT="N/A"
(( FAIL_COUNT++ ))
fi
printf " %-8s %-12s %-12s %-10s %-10s %-10s\n" \
"$i" "$PORT" "cpu${CPU}" "$GBPS" "$LOST" "$LOST_PCT" \
| tee -a "$RESULT_DIR/summary.txt"
done
# ------------------------------------------------------------
# 打印汇总行
# ------------------------------------------------------------
{
echo "------------------------------------------------------------"
TOTAL_GBPS=$(python3 -c "print(f'{$TOTAL_BPS/1e9:.3f} Gbps')" 2>/dev/null || echo "N/A")
if [ "$TOTAL_SENT" -gt 0 ] 2>/dev/null; then
TOTAL_LOST_PCT=$(python3 -c "print(f'{$TOTAL_LOST/$TOTAL_SENT*100:.2f}%')" 2>/dev/null || echo "N/A")
else
TOTAL_LOST_PCT="N/A"
fi
printf " %-8s %-12s %-12s %-10s %-10s %-10s\n" \
"TOTAL" "" "" "$TOTAL_GBPS" "$TOTAL_LOST" "$TOTAL_LOST_PCT"
echo "------------------------------------------------------------"
[ "$FAIL_COUNT" -gt 0 ] && echo " [!] ${FAIL_COUNT} 条流出现错误,请检查对应 log 文件"
echo " 详细日志目录: $RESULT_DIR"
echo "============================================================"
} | tee -a "$RESULT_DIR/summary.txt"
Etach udp test attached a cpu, but result is :
============================================================
iperf3 UDP 多流并行测试结果汇总
时间: Thu Feb 20 08:25:51 AM EST 2025
服务器: 192.168.3.198 流数: 10 时长: 30s
每流带宽: 1G 包长: 1400B
------------------------------------------------------------
Stream Port CPU Throughput Lost Lost%
------------------------------------------------------------
0 5201 cpu0 0.045 Gbps 697 0.57%
1 5202 cpu1 0.248 Gbps 320 0.05%
2 5203 cpu2 0.281 Gbps 250 0.03%
3 5204 cpu3 0.281 Gbps 10 0.00%
4 5205 cpu4 0.280 Gbps 79 0.01%
5 5206 cpu5 0.281 Gbps 223 0.03%
6 5207 cpu6 0.281 Gbps 26 0.00%
7 5208 cpu7 0.280 Gbps 19 0.00%
8 5209 cpu8 0.281 Gbps 35 0.00%
9 5210 cpu9 0.280 Gbps 29 0.00%
------------------------------------------------------------
TOTAL 2.538 Gbps 1688 0.02%
------------------------------------------------------------
详细日志目录: /tmp/iperf3_results
============================================================
and I notice that in driver:
/**
* @brief Select queue based on user priority
*
* Algorithm:
* 1) Select the correct queue index based which has priority of queue
* same as skb->priority
* 2) default select queue array index 0
*
* @param[in] dev: Network device pointer
* @param[in] skb: sk_buff pointer, buffer data to send
* @param[in] accel_priv: private data used for L2 forwarding offload
* @param[in] fallback: fallback function pointer
*
* @retval "transmit queue index"
*/
static unsigned short ether_select_queue(struct net_device *dev,
struct sk_buff *skb,
struct net_device *sb_dev)
{
struct ether_priv_data *pdata = netdev_priv(dev);
struct osi_core_priv_data *osi_core = pdata->osi_core;
unsigned short txqueue_select = 0;
unsigned int i, mtlq;
unsigned int priority = skb->priority;
if (skb_vlan_tag_present(skb)) {
priority = skb_vlan_tag_get_prio(skb);
}
for (i = 0; i < osi_core->num_mtl_queues; i++) {
mtlq = osi_core->mtl_queues[i];
if (pdata->txq_prio[mtlq] == priority) {
txqueue_select = (unsigned short)i;
break;
}
}
return txqueue_select;
}
Here are tx queue select always 0, whether this code limited the UDP speed? TCP could up 8.7Gbit/s result in TSO?