Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ defmt = ["dep:defmt", "heapless/defmt"]

"packetmeta-id" = []

# Enables segmentation offload support.
"segmentation-offload" = []

"async" = []

# Automatically reply on an ICMP echo request
Expand Down
28 changes: 20 additions & 8 deletions src/iface/interface/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -770,13 +770,11 @@ impl Interface {
})
}
#[cfg(feature = "socket-tcp")]
Socket::Tcp(socket) => socket.dispatch(&mut self.inner, |inner, (ip, tcp)| {
respond(
inner,
PacketMeta::default(),
Packet::new(ip, IpPayload::Tcp(tcp)),
)
}),
Socket::Tcp(socket) => {
socket.dispatch(&mut self.inner, |inner, meta, (ip, tcp)| {
respond(inner, meta, Packet::new(ip, IpPayload::Tcp(tcp)))
})
}
#[cfg(feature = "socket-dhcpv4")]
Socket::Dhcpv4(socket) => {
socket.dispatch(&mut self.inner, |inner, (ip, udp, dhcp)| {
Expand Down Expand Up @@ -833,6 +831,12 @@ impl InterfaceInner {
self.caps.checksum.clone()
}

#[cfg(feature = "segmentation-offload")]
#[allow(unused)] // unused depending on which sockets are enabled
pub(crate) fn segmentation_caps(&self) -> crate::phy::SegmentationCapabilities {
self.caps.segmentation.clone()
}

#[allow(unused)] // unused depending on which sockets are enabled
pub(crate) fn ip_mtu(&self) -> usize {
self.caps.ip_mtu()
Expand Down Expand Up @@ -1273,7 +1277,15 @@ impl InterfaceInner {
#[cfg(feature = "proto-ipv4")]
IpRepr::Ipv4(repr) => {
// If we have an IPv4 packet, then we need to check if we need to fragment it.
if total_ip_len > self.caps.ip_mtu() {
let should_fragment = total_ip_len > self.caps.ip_mtu();

// If the second condition is false (i.e. the metadata includes a target segment
// size), the packet will be segmented by the device and fragmentation on our side
// is not necessary.
#[cfg(feature = "segmentation-offload")]
let should_fragment = should_fragment && meta.segmentation_offload_size.is_none();

if should_fragment {
#[cfg(feature = "proto-ipv4-fragmentation")]
{
net_debug!("start fragmentation");
Expand Down
35 changes: 34 additions & 1 deletion src/phy/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ impl<'a> phy::TxToken for StmPhyTxToken<'a> {
)]

use crate::time::Instant;
#[cfg(feature = "segmentation-offload")]
use core::num::{NonZeroU16, NonZeroUsize};

#[cfg(all(
any(feature = "phy-raw_socket", feature = "phy-tuntap_interface"),
Expand Down Expand Up @@ -147,7 +149,7 @@ pub const IPV4_FRAGMENT_PAYLOAD_ALIGNMENT: usize = 8;
/// struct becomes zero-sized, which allows the compiler to optimize it out as if
/// the packet metadata mechanism didn't exist at all.
///
/// Currently only UDP sockets allow setting/retrieving packet metadata. The metadata
/// Currently only TCP and UDP sockets allow setting/retrieving packet metadata. The metadata
/// for packets emitted with other sockets will be all default values.
///
/// This struct is marked as `#[non_exhaustive]`. This means it is not possible to
Expand All @@ -168,6 +170,8 @@ pub const IPV4_FRAGMENT_PAYLOAD_ALIGNMENT: usize = 8;
pub struct PacketMeta {
#[cfg(feature = "packetmeta-id")]
pub id: u32,
#[cfg(feature = "segmentation-offload")]
pub segmentation_offload_size: Option<NonZeroU16>,
}

/// A description of checksum behavior for a particular protocol.
Expand Down Expand Up @@ -233,6 +237,28 @@ impl ChecksumCapabilities {
}
}

/// The maximum buffer size for a particular protocol or protocol pair that
/// can be offloaded to the device for segmentation, or [None] if segmentation
/// offload is not supported.
///
/// For Ethernet devices, this includes the Ethernet header (14 octets), but
/// *not* the Ethernet FCS (4 octets).
///
/// If the device supports unsegmented IP packets with (depending on the IP
/// version, total or payload) lengths greater than [u16::MAX], it should not
/// rely on the length field in the IP header, as the actual length cannot be
/// represented there. The value will be 0 instead.
#[derive(Debug, Clone, Default)]
#[cfg_attr(feature = "defmt", derive(defmt::Format))]
#[non_exhaustive]
#[cfg(feature = "segmentation-offload")]
pub struct SegmentationCapabilities {
#[cfg(all(feature = "socket-tcp", feature = "proto-ipv4"))]
pub tcpv4: Option<NonZeroUsize>,
#[cfg(all(feature = "socket-tcp", feature = "proto-ipv6"))]
pub tcpv6: Option<NonZeroUsize>,
}

/// A description of device capabilities.
///
/// Higher-level protocols may achieve higher throughput or lower latency if they consider
Expand Down Expand Up @@ -276,6 +302,13 @@ pub struct DeviceCapabilities {
/// If the network device is capable of verifying or computing checksums for some protocols,
/// it can request that the stack not do so in software to improve performance.
pub checksum: ChecksumCapabilities,

#[cfg(feature = "segmentation-offload")]
/// Segmentation offload capabilities.
///
/// If the network device is capable of segmenting packets for some protocols,
/// it can request that the stack not do so in software to improve performance.
pub segmentation: SegmentationCapabilities,
}

impl DeviceCapabilities {
Expand Down
31 changes: 24 additions & 7 deletions src/phy/pcap_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ pub trait PcapSink {
self.write_u16(4); // minor version
self.write_u32(0); // timezone (= UTC)
self.write_u32(0); // accuracy (not used)
self.write_u32(65535); // maximum packet length
self.write_u32(self.max_packet_size()); // maximum packet length
self.write_u32(link_type.into()); // link-layer header type
}

Expand All @@ -71,24 +71,41 @@ pub trait PcapSink {
/// See also the note for [global_header](#method.global_header).
///
/// # Panics
/// This function panics if `length` is greater than 65535.
/// This function panics if `length` is greater than [u32::MAX].
fn packet_header(&mut self, timestamp: Instant, length: usize) {
assert!(length <= 65535);
let original_length = length.try_into().unwrap();

self.write_u32(timestamp.secs() as u32); // timestamp seconds
self.write_u32(timestamp.micros() as u32); // timestamp microseconds
self.write_u32(length as u32); // captured length
self.write_u32(length as u32); // original length
self.write_u32(self.max_packet_size().min(original_length)); // captured length
self.write_u32(original_length);
}

/// Write the libpcap packet header followed by packet data into the sink.
///
/// The default implementation truncates packets that are larger than [Self::max_packet_size].
///
/// See also the note for [global_header](#method.global_header).
fn packet(&mut self, timestamp: Instant, packet: &[u8]) {
self.packet_header(timestamp, packet.len());
self.write(packet);
let packet_len = packet.len();
let max_packet_size = usize::try_from(self.max_packet_size()).unwrap();

self.packet_header(timestamp, packet_len);
self.write(&packet[..max_packet_size.min(packet_len)]);
self.flush();
}

/// Return the maximum size for captured packets.
///
/// The captures of packets larger than this size will be truncated by default. Excessively
/// large values may cause the software reading the captures to allocate unnecessarily large
/// buffers.
fn max_packet_size(&self) -> u32 {
// Use the default value used by [libpcap] and [Wireshark].
// [Wireshark]: https://gitlab.com/wireshark/wireshark/-/blob/v3.5.0/wiretap/wtap.h#L334
// [libpcap]: https://github.com/the-tcpdump-group/libpcap/blob/libpcap-1.6.0-bp/pcap-int.h#L106
262144
}
}

#[cfg(feature = "std")]
Expand Down
63 changes: 55 additions & 8 deletions src/socket/tcp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use core::fmt::Display;
use core::task::Waker;
use core::{fmt, mem};

use crate::phy::PacketMeta;
#[cfg(feature = "async")]
use crate::socket::WakerRegistration;
use crate::socket::{Context, PollAt};
Expand Down Expand Up @@ -2351,7 +2352,7 @@ impl<'a> Socket<'a> {

pub(crate) fn dispatch<F, E>(&mut self, cx: &mut Context, emit: F) -> Result<(), E>
where
F: FnOnce(&mut Context, (IpRepr, TcpRepr)) -> Result<(), E>,
F: FnOnce(&mut Context, PacketMeta, (IpRepr, TcpRepr)) -> Result<(), E>,
{
if self.tuple.is_none() {
return Ok(());
Expand Down Expand Up @@ -2478,6 +2479,15 @@ impl<'a> Socket<'a> {

let mut is_zero_window_probe = false;

#[cfg_attr(
not(feature = "segmentation-offload"),
expect(
unused_mut,
reason = "The default is not mutated if the segmentation offload feature is not enabled."
)
)]
let mut packet_meta = PacketMeta::default();

match self.state {
// We transmit an RST in the CLOSED state. If we ended up in the CLOSED state
// with a specified endpoint, it means that the socket was aborted.
Expand Down Expand Up @@ -2536,17 +2546,54 @@ impl<'a> Socket<'a> {
is_zero_window_probe = true;
}

// Maximum size we're allowed to send. This can be limited by 3 factors:
// Maximum size we're allowed to send can be limited by 3 factors:
// 1. remote window
// 2. MSS the remote is willing to accept, probably determined by their MTU
// 3. MSS we can send, determined by our MTU.
let size = win_limit
.min(self.remote_mss)
//
// If the device supports its offload, segmentation that is needed
// to comply with the latter two will be handled by the device based on the
// metadata we provide.

let segment_size = self
.remote_mss
.min(cx.ip_mtu() - ip_repr.header_len() - TCP_HEADER_LEN);

#[cfg(not(feature = "segmentation-offload"))]
let device_limit = segment_size;

#[cfg(feature = "segmentation-offload")]
let device_limit = {
let segmentation_caps = cx.segmentation_caps();
match ip_repr.version() {
#[cfg(feature = "proto-ipv4")]
crate::wire::IpVersion::Ipv4 => segmentation_caps.tcpv4,
#[cfg(feature = "proto-ipv6")]
crate::wire::IpVersion::Ipv6 => segmentation_caps.tcpv6,
}
.map(|buf_size| {
#[cfg(feature = "medium-ethernet")]
let ip_mtu = buf_size.get() - crate::wire::ETHERNET_HEADER_LEN;
#[cfg(not(feature = "medium-ethernet"))]
let ip_mtu = buf_size.get();
ip_mtu - ip_repr.header_len() - TCP_HEADER_LEN
})
.unwrap_or(segment_size)
};

let size = win_limit.min(device_limit);

let offset = self.remote_last_seq - self.local_seq_no;
repr.payload = self.tx_buffer.get_allocated(offset, size);

#[cfg(feature = "segmentation-offload")]
if repr.payload.len() > segment_size {
packet_meta.segmentation_offload_size =
core::num::NonZeroU16::try_from(u16::try_from(segment_size).unwrap())
.unwrap()
.into();
}

// If we've sent everything we had in the buffer, follow it with the PSH or FIN
// flags, depending on whether the transmit half of the connection is open.
if offset + repr.payload.len() == self.tx_buffer.len() {
Expand Down Expand Up @@ -2616,7 +2663,7 @@ impl<'a> Socket<'a> {
// to not waste time waiting for the retransmit timer on packets that we know
// for sure will not be successfully transmitted.
ip_repr.set_payload_len(repr.buffer_len());
emit(cx, (ip_repr, repr))?;
emit(cx, packet_meta, (ip_repr, repr))?;

// We've sent something, whether useful data or a keep-alive packet, so rewind
// the keep-alive timer.
Expand Down Expand Up @@ -2909,7 +2956,7 @@ mod test {
let mut sent = 0;
let result = socket
.socket
.dispatch(&mut socket.cx, |_, (ip_repr, tcp_repr)| {
.dispatch(&mut socket.cx, |_, _, (ip_repr, tcp_repr)| {
assert_eq!(ip_repr.next_header(), IpProtocol::Tcp);
assert_eq!(ip_repr.src_addr(), LOCAL_ADDR.into());
assert_eq!(ip_repr.dst_addr(), REMOTE_ADDR.into());
Expand All @@ -2930,7 +2977,7 @@ mod test {
socket.cx.set_now(timestamp);

let mut fail = false;
let result: Result<(), ()> = socket.socket.dispatch(&mut socket.cx, |_, _| {
let result: Result<(), ()> = socket.socket.dispatch(&mut socket.cx, |_, _, _| {
fail = true;
Ok(())
});
Expand Down Expand Up @@ -7994,7 +8041,7 @@ mod test {

s.set_hop_limit(Some(0x2a));
assert_eq!(
s.socket.dispatch(&mut s.cx, |_, (ip_repr, _)| {
s.socket.dispatch(&mut s.cx, |_, _, (ip_repr, _)| {
assert_eq!(ip_repr.hop_limit(), 0x2a);
Ok::<_, ()>(())
}),
Expand Down
9 changes: 9 additions & 0 deletions src/wire/ipv4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,16 @@ impl Repr {
packet.set_header_len(field::DST_ADDR.end as u8);
packet.set_dscp(0);
packet.set_ecn(0);
#[cfg(not(feature = "segmentation-offload"))]
let total_len = packet.header_len() as u16 + self.payload_len as u16;
#[cfg(feature = "segmentation-offload")]
// If because of segmentation offload the length of the buffer exceeds what can be
// represented in the length field of the IP header, we fall back to 0. It will be
// filled by the device during segmentation anyways.
let total_len = u16::try_from(self.payload_len)
.ok()
.and_then(|payload_len: u16| payload_len.checked_add(packet.header_len() as u16))
.unwrap_or(0);
packet.set_total_len(total_len);
packet.set_ident(0);
packet.clear_flags();
Expand Down
9 changes: 8 additions & 1 deletion src/wire/ipv6.rs
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,14 @@ impl Repr {
packet.set_version(6);
packet.set_traffic_class(0);
packet.set_flow_label(0);
packet.set_payload_len(self.payload_len as u16);
#[cfg(not(feature = "segmentation-offload"))]
let payload_len = self.payload_len as u16;
#[cfg(feature = "segmentation-offload")]
// If because of segmentation offload the length of the buffer exceeds what can be
// represented in the length field of the IP header, we fall back to 0. It will be
// filled by the device during segmentation anyways.
let payload_len = u16::try_from(self.payload_len).unwrap_or(0);
packet.set_payload_len(payload_len);
packet.set_hop_limit(self.hop_limit);
packet.set_next_header(self.next_header);
packet.set_src_addr(self.src_addr);
Expand Down
Loading