netdev_max_backlog
/proc/sys/net/core
sysctl -w net.core.rmem_max=16777216 # max recv - thought this would save me sysctl -w net.core.wmem_max=16777216 # max send - narrator: it did not
sysctl -w net.core.rmem_max=16777216 # max recv - thought this would save me sysctl -w net.core.wmem_max=16777216 # max send - narrator: it did not
sysctl -w net.core.rmem_max=16777216 # max recv - thought this would save me sysctl -w net.core.wmem_max=16777216 # max send - narrator: it did not
netstat -s | grep -i drop
netdev_max_backlog
/proc/net/softnet_stat
net.core.rmem_max = 134217728 # 128MB ceiling (recv) net.core.wmem_max = 134217728 # 128MB ceiling (send) net.core.rmem_default = 16777216 # 16MB starting point net.core.wmem_default = 16777216 # lets auto-tuning grow from here # Per-socket TCP tuning: min, default, max in bytes net.ipv4.tcp_rmem = 4096 87380 134217728 # recv can grow to 128MB net.ipv4.tcp_wmem = 4096 65536 134217728 # send grows too # CRITICAL: This is in 4KB pages, NOT bytes net.ipv4.tcp_mem = 6291456 8388608 12582912 # ~24GB total across all sockets net.core.netdev_max_backlog = 250000 # the hero that saved us
net.core.rmem_max = 134217728 # 128MB ceiling (recv) net.core.wmem_max = 134217728 # 128MB ceiling (send) net.core.rmem_default = 16777216 # 16MB starting point net.core.wmem_default = 16777216 # lets auto-tuning grow from here # Per-socket TCP tuning: min, default, max in bytes net.ipv4.tcp_rmem = 4096 87380 134217728 # recv can grow to 128MB net.ipv4.tcp_wmem = 4096 65536 134217728 # send grows too # CRITICAL: This is in 4KB pages, NOT bytes net.ipv4.tcp_mem = 6291456 8388608 12582912 # ~24GB total across all sockets net.core.netdev_max_backlog = 250000 # the hero that saved us
net.core.rmem_max = 134217728 # 128MB ceiling (recv) net.core.wmem_max = 134217728 # 128MB ceiling (send) net.core.rmem_default = 16777216 # 16MB starting point net.core.wmem_default = 16777216 # lets auto-tuning grow from here # Per-socket TCP tuning: min, default, max in bytes net.ipv4.tcp_rmem = 4096 87380 134217728 # recv can grow to 128MB net.ipv4.tcp_wmem = 4096 65536 134217728 # send grows too # CRITICAL: This is in 4KB pages, NOT bytes net.ipv4.tcp_mem = 6291456 8388608 12582912 # ~24GB total across all sockets net.core.netdev_max_backlog = 250000 # the hero that saved us
net.core.default_qdisc = fq # fair queuing for pacing net.ipv4.tcp_congestion_control = bbr # goodbye CUBIC
net.core.default_qdisc = fq # fair queuing for pacing net.ipv4.tcp_congestion_control = bbr # goodbye CUBIC
net.core.default_qdisc = fq # fair queuing for pacing net.ipv4.tcp_congestion_control = bbr # goodbye CUBIC
ethtool -g eth0 # check current and max ring sizes # What I saw: # Pre-set maximums: # RX: 4096 ← NIC supports this # Current hardware settings: # RX: 512 ← are you kidding me
ethtool -g eth0 # check current and max ring sizes # What I saw: # Pre-set maximums: # RX: 4096 ← NIC supports this # Current hardware settings: # RX: 512 ← are you kidding me
ethtool -g eth0 # check current and max ring sizes # What I saw: # Pre-set maximums: # RX: 4096 ← NIC supports this # Current hardware settings: # RX: 512 ← are you kidding me
ethtool -G eth0 rx 4096 tx 4096 # bump to hardware max
ethtool -G eth0 rx 4096 tx 4096 # bump to hardware max
ethtool -G eth0 rx 4096 tx 4096 # bump to hardware max
ethtool -c eth0 # see current coalescing settings # What worked for bulk replication on our 10G boxes: ethtool -C eth0 rx-usecs 128 rx-frames 64 # wait 128µs or 64 packets # What you'd use for twitchy latency (we used something in between): ethtool -C eth0 rx-usecs 0 rx-frames 1 # interrupt immediately
ethtool -c eth0 # see current coalescing settings # What worked for bulk replication on our 10G boxes: ethtool -C eth0 rx-usecs 128 rx-frames 64 # wait 128µs or 64 packets # What you'd use for twitchy latency (we used something in between): ethtool -C eth0 rx-usecs 0 rx-frames 1 # interrupt immediately
ethtool -c eth0 # see current coalescing settings # What worked for bulk replication on our 10G boxes: ethtool -C eth0 rx-usecs 128 rx-frames 64 # wait 128µs or 64 packets # What you'd use for twitchy latency (we used something in between): ethtool -C eth0 rx-usecs 0 rx-frames 1 # interrupt immediately
rx-usecs 10 rx-frames 8
netdev_max_backlog
ethtool -l eth0 # how many RSS queues are active ethtool -L eth0 combined 16 # match your CPU count cat /proc/interrupts | grep eth0 # see which CPUs handle which queues
ethtool -l eth0 # how many RSS queues are active ethtool -L eth0 combined 16 # match your CPU count cat /proc/interrupts | grep eth0 # see which CPUs handle which queues
ethtool -l eth0 # how many RSS queues are active ethtool -L eth0 combined 16 # match your CPU count cat /proc/interrupts | grep eth0 # see which CPUs handle which queues
/proc/interrupts
ethtool -g eth0
net.core.netdev_max_backlog
/proc/net/softnet_stat
ethtool -l/-L
/proc/interrupts
netdev_max_backlog
/proc/net/softnet_stat - Zero packet loss under production bursts (was losing 15–20%)
- 10Gbps sustained (was averaging 7.2Gbps)
- WAN transfers: 3.5Gbps → 5.8Gbps with BBR
- P99 latency: 2.1ms → 180µs
- CPU cost per packet dropped 15% (fewer interrupts = more useful work) - Check your RX ring: ethtool -g eth0 and bump it if you're still at 512.
- Check your backlog and drops: net.core.netdev_max_backlog + /proc/net/softnet_stat.
- Check RSS: ethtool -l/-L and /proc/interrupts to make sure all cores are actually getting traffic. - 🚀 Follow The Speed Engineer for more Rust, Go and high-performance engineering stories.
- 💡 Like this article? Follow for daily speed-engineering benchmarks and tactics.
- ⚡ Stay ahead in Rust and Go — follow for a fresh article every morning & night.