/* * Copyright (c) 2018 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #define foreach_gso_error \ _ (NO_BUFFERS, "no buffers to segment GSO") \ _ (UNHANDLED_TYPE, "unhandled gso type") static char *gso_error_strings[] = { #define _(sym, string) string, foreach_gso_error #undef _ }; typedef enum { #define _(sym, str) GSO_ERROR_##sym, foreach_gso_error #undef _ GSO_N_ERROR, } gso_error_t; typedef enum { GSO_NEXT_DROP, GSO_N_NEXT, } gso_next_t; typedef struct { u32 flags; u16 gso_size; u8 gso_l4_hdr_sz; generic_header_offset_t gho; } gso_trace_t; static u8 * format_gso_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); gso_trace_t *t = va_arg (*args, gso_trace_t *); if (t->flags & VNET_BUFFER_F_GSO) { s = format (s, "gso_sz %d gso_l4_hdr_sz %d\n%U", t->gso_size, t->gso_l4_hdr_sz, format_generic_header_offset, &t->gho); } else { s = format (s, "non-gso buffer\n%U", format_generic_header_offset, &t->gho); } return s; } static_always_inline u16 tso_segment_ipip_tunnel_fixup (vlib_main_t * vm, vnet_interface_per_thread_data_t * ptd, vlib_buffer_t * sb0, generic_header_offset_t * gho) { u16 n_tx_bufs = vec_len (ptd->split_buffers); u16 i = 0, n_tx_bytes = 0; while (i < n_tx_bufs) { vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]); ip4_header_t *ip4 = (ip4_header_t *) (vlib_buffer_get_current (b0) + gho->outer_l3_hdr_offset); ip6_header_t *ip6 = (ip6_header_t *) (vlib_buffer_get_current (b0) + gho->outer_l3_hdr_offset); if (gho->gho_flags & GHO_F_OUTER_IP4) { ip4->length = clib_host_to_net_u16 (b0->current_length - gho->outer_l3_hdr_offset); ip4->checksum = ip4_header_checksum (ip4); } else if (gho->gho_flags & GHO_F_OUTER_IP6) { ip6->payload_length = clib_host_to_net_u16 (b0->current_length - gho->outer_l4_hdr_offset); } n_tx_bytes += gho->outer_hdr_sz; i++; } return n_tx_bytes; } static_always_inline void tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t * vm, vlib_buffer_t * b, generic_header_offset_t * gho) { u8 proto = 0; ip4_header_t *ip4 = 0; ip6_header_t *ip6 = 0; udp_header_t *udp = 0; ip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset); ip6 = (ip6_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset); udp = (udp_header_t *) (vlib_buffer_get_current (b) + gho->outer_l4_hdr_offset); if (gho->gho_flags & GHO_F_OUTER_IP4) { proto = ip4->protocol; ip4->length = clib_host_to_net_u16 (b->current_length - gho->outer_l3_hdr_offset); ip4->checksum = ip4_header_checksum (ip4); } else if (gho->gho_flags & GHO_F_OUTER_IP6) { proto = ip6->protocol; ip6->payload_length = clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset); } if (proto == IP_PROTOCOL_UDP) { int bogus; udp->length = clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset); udp->checksum = 0; if (gho->gho_flags & GHO_F_OUTER_IP6) { udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus); } else if (gho->gho_flags & GHO_F_OUTER_IP4) { udp->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip4); } /* FIXME: it should be OUTER_UDP_CKSUM */ vnet_buffer_offload_flags_clear (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM); } } static_always_inline u16 tso_segment_vxlan_tunnel_fixup (vlib_main_t * vm, vnet_interface_per_thread_data_t * ptd, vlib_buffer_t * sb0, generic_header_offset_t * gho) { u16 n_tx_bufs = vec_len (ptd->split_buffers); u16 i = 0, n_tx_bytes = 0; while (i < n_tx_bufs) { vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]); tso_segment_vxlan_tunnel_headers_fixup (vm, b0, gho); n_tx_bytes += gho->outer_hdr_sz; i++; } return n_tx_bytes; } static_always_inline u16 tso_alloc_tx_bufs (vlib_main_t * vm, vnet_interface_per_thread_data_t * ptd, vlib_buffer_t * b0, u32 n_bytes_b0, u16 l234_sz, u16 gso_size, u16 first_data_size, generic_header_offset_t * gho) { u16 n_alloc, size; u16 first_packet_length = l234_sz + first_data_size; /* * size is the amount of data per segmented buffer except the 1st * segmented buffer. * l2_hdr_offset is an offset == current_data of vlib_buffer_t. * l234_sz is hdr_sz from l2_hdr_offset. */ size = clib_min (gso_size, vlib_buffer_get_default_data_size (vm) - l234_sz - gho->l2_hdr_offset); /* * First segmented buffer length is calculated separately. * As it may contain less data than gso_size (when gso_size is * greater than current_length of 1st buffer from GSO chained * buffers) and/or size calculated above. */ u16 n_bufs = 1; /* * Total packet length minus first packet length including l234 header. * rounded-up division */ ASSERT (n_bytes_b0 > first_packet_length); n_bufs += ((n_bytes_b0 - first_packet_length + (size - 1)) / size); vec_validate (ptd->split_buffers, n_bufs - 1); n_alloc = vlib_buffer_alloc (vm, ptd->split_buffers, n_bufs); if (n_alloc < n_bufs) { vlib_buffer_free (vm, ptd->split_buffers, n_alloc); return 0; } return n_alloc; } static_always_inline void tso_init_buf_from_template_base (vlib_buffer_t * nb0, vlib_buffer_t * b0, u32 flags, u16 length) { /* copying objects from cacheline 0 */ nb0->current_data = b0->current_data; nb0->current_length = length; nb0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | flags; nb0->flow_id = b0->flow_id; nb0->error = b0->error; nb0->current_config_index = b0->current_config_index; clib_memcpy_fast (&nb0->opaque, &b0->opaque, sizeof (nb0->opaque)); /* copying objects from cacheline 1 */ nb0->trace_handle = b0->trace_handle; nb0->total_length_not_including_first_buffer = 0; /* copying data */ clib_memcpy_fast (vlib_buffer_get_current (nb0), vlib_buffer_get_current (b0), length); } static_always_inline void tso_init_buf_from_template (vlib_main_t * vm, vlib_buffer_t * nb0, vlib_buffer_t * b0, u16 template_data_sz, u16 gso_size, u8 ** p_dst_ptr, u16 * p_dst_left, u32 next_tcp_seq, u32 flags, generic_header_offset_t * gho) { tso_init_buf_from_template_base (nb0, b0, flags, template_data_sz); *p_dst_left = clib_min (gso_size, vlib_buffer_get_default_data_size (vm) - (template_data_sz + nb0->current_data)); *p_dst_ptr = vlib_buffer_get_current (nb0) + template_data_sz; tcp_header_t *tcp = (tcp_header_t *) (vlib_buffer_get_current (nb0) + gho->l4_hdr_offset); tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq); } static_always_inline void tso_fixup_segmented_buf (vlib_main_t * vm, vlib_buffer_t * b0, u8 tcp_flags, int is_l2, int is_ip6, generic_header_offset_t * gho) { ip4_header_t *ip4 = (ip4_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset); ip6_header_t *ip6 = (ip6_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset); tcp_header_t *tcp = (tcp_header_t *) (vlib_buffer_get_current (b0) + gho->l4_hdr_offset); tcp->flags = tcp_flags; if (is_ip6) { ip6->payload_length = clib_host_to_net_u16 (b0->current_length - gho->l4_hdr_offset); if (gho->gho_flags & GHO_F_TCP) { int bogus = 0; tcp->checksum = 0; tcp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6, &bogus); vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM); } } else { ip4->length = clib_host_to_net_u16 (b0->current_length - gho->l3_hdr_offset); if (gho->gho_flags & GHO_F_IP4) ip4->checksum = ip4_header_checksum (ip4); if (gho->gho_flags & GHO_F_TCP) { tcp->checksum = 0; tcp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip4); } vnet_buffer_offload_flags_clear (b0, (VNET_BUFFER_OFFLOAD_F_IP_CKSUM | VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)); } if (!is_l2 && ((gho->gho_flags & GHO_F_TUNNEL) == 0)) { u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; ip_adjacency_t *adj0 = adj_get (adj_index0); if (adj0->lookup_next_index == IP_LOOKUP_NEXT_MIDCHAIN && adj0->sub_type.midchain.fixup_func) /* calls e.g. ipip44_fixup */ adj0->sub_type.midchain.fixup_func (vm, adj0, b0, adj0->sub_type.midchain.fixup_data); } } /** * Allocate the necessary number of ptd->split_buffers, * and segment the possibly chained buffer(s) from b0 into * there. * * Return the cumulative number of bytes sent or zero * if allocation failed. */ static_always_inline u32 tso_segment_buffer (vlib_main_t * vm, vnet_interface_per_thread_data_t * ptd, u32 sbi0, vlib_buffer_t * sb0, generic_header_offset_t * gho, u32 n_bytes_b0, int is_l2, int is_ip6) { u32 n_tx_bytes = 0; u16 gso_size = vnet_buffer2 (sb0)->gso_size; u8 save_tcp_flags = 0; u8 tcp_flags_no_fin_psh = 0; u32 next_tcp_seq = 0; tcp_header_t *tcp = (tcp_header_t *) (vlib_buffer_get_current (sb0) + gho->l4_hdr_offset); next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number); /* store original flags for last packet and reset FIN and PSH */ save_tcp_flags = tcp->flags; tcp_flags_no_fin_psh = tcp->flags & ~(TCP_FLAG_FIN | TCP_FLAG_PSH); tcp->checksum = 0; u32 default_bflags = sb0->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT); u16 l234_sz = gho->hdr_sz; int first_data_size = clib_min (gso_size, sb0->current_length - l234_sz); next_tcp_seq += first_data_size; if (PREDICT_FALSE (!tso_alloc_tx_bufs (vm, ptd, sb0, n_bytes_b0, l234_sz, gso_size, first_data_size, gho))) return 0; vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[0]); tso_init_buf_from_template_base (b0, sb0, default_bflags, l234_sz + first_data_size); u32 total_src_left = n_bytes_b0 - l234_sz - first_data_size; if (total_src_left) { /* Need to copy more segments */ u8 *src_ptr, *dst_ptr; u16 src_left, dst_left; /* current source buffer */ vlib_buffer_t *csb0 = sb0; u32 csbi0 = sbi0; /* current dest buffer */ vlib_buffer_t *cdb0; u16 dbi = 1; /* the buffer [0] is b0 */ src_ptr = vlib_buffer_get_current (sb0) + l234_sz + first_data_size; src_left = sb0->current_length - l234_sz - first_data_size; tso_fixup_segmented_buf (vm, b0, tcp_flags_no_fin_psh, is_l2, is_ip6, gho); /* grab a second buffer and prepare the lo
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef __VIRTIO_STD_H__
#define __VIRTIO_STD_H__

#define foreach_virtio_net_features      \
  _ (VIRTIO_NET_F_CSUM, 0)      /* Host handles pkts w/ partial csum */ \
  _ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \
  _ (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, 2) /* Dynamic offload configuration. */ \
  _ (VIRTIO_NET_F_MTU, 3)       /* Initial MTU advice. */ \
  _ (VIRTIO_NET_F_MAC, 5)       /* Host has given MAC address. */ \
  _ (VIRTIO_NET_F_GSO, 6)       /* Host handles pkts w/ any GSO. */ \
  _ (VIRTIO_NET_F_GUEST_TSO4, 7)        /* Guest can handle TSOv4 in. */ \
  _ (VIRTIO_NET_F_GUEST_TSO6, 8)        /* Guest can handle TSOv6 in. */ \
  _ (VIRTIO_NET_F_GUEST_ECN, 9) /* Guest can handle TSO[6] w/ ECN in. */ \
  _ (VIRTIO_NET_F_GUEST_UFO, 10)        /* Guest can handle UFO in. */ \
  _ (VIRTIO_NET_F_HOST_TSO4, 11)        /* Host can handle TSOv4 in. */ \
  _ (VIRTIO_NET_F_HOST_TSO6, 12)        /* Host can handle TSOv6 in. */ \
  _ (VIRTIO_NET_F_HOST_ECN, 13) /* Host can handle TSO[6] w/ ECN in. */ \
  _ (VIRTIO_NET_F_HOST_UFO, 14) /* Host can handle UFO in. */ \
  _ (VIRTIO_NET_F_MRG_RXBUF, 15)        /* Host can merge receive buffers. */ \
  _ (VIRTIO_NET_F_STATUS, 16)   /* virtio_net_config.status available */ \
  _ (VIRTIO_NET_F_CTRL_VQ, 17)  /* Control channel available */ \
  _ (VIRTIO_NET_F_CTRL_RX, 18)  /* Control channel RX mode support */ \
  _ (VIRTIO_NET_F_CTRL_VLAN, 19)        /* Control channel VLAN filtering */ \
  _ (VIRTIO_NET_F_CTRL_RX_EXTRA, 20)    /* Extra RX mode control support */ \
  _ (VIRTIO_NET_F_GUEST_ANNOUNCE, 21)   /* Guest can announce device on the network */ \
  _ (VIRTIO_NET_F_MQ, 22)               /* Device supports Receive Flow Steering */ \
  _ (VIRTIO_NET_F_CTRL_MAC_ADDR, 23)    /* Set MAC address */ \
  _ (VIRTIO_F_NOTIFY_ON_EMPTY, 24) \
  _ (VHOST_F_LOG_ALL, 26)      /* Log all write descriptors */ \
  _ (VIRTIO_F_ANY_LAYOUT, 27)  /* Can the device handle any descriptor layout */ \
  _ (VIRTIO_RING_F_INDIRECT_DESC, 28)   /* Support indirect buffer descriptors */ \
  _ (VIRTIO_RING_F_EVENT_IDX, 29)       /* The Guest publishes the used index for which it expects an interrupt \
 * at the end of the avail ring. Host should ignore the avail->flags field. */ \
/* The Host publishes the avail index for which it expects a kick \
 * at the end of the used ring. Guest should ignore the used->flags field. */ \
  _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \
  _ (VIRTIO_F_VERSION_1, 32)  /* v1.0 compliant. */           \
  _ (VIRTIO_F_IOMMU_PLATFORM, 33) \
  _ (VIRTIO_F_RING_PACKED, 34) \
  _ (VIRTIO_F_IN_ORDER, 35)  /* all buffers are used by the device in the */ \
                         /* same order in which they have been made available */ \