diff options
Diffstat (limited to 'src/plugins/avf/output.c')
-rw-r--r-- | src/plugins/avf/output.c | 57 |
1 files changed, 53 insertions, 4 deletions
diff --git a/src/plugins/avf/output.c b/src/plugins/avf/output.c index 4cc9d5a49c1..daa86ae86b2 100644 --- a/src/plugins/avf/output.c +++ b/src/plugins/avf/output.c @@ -19,6 +19,7 @@ #include <vlib/unix/unix.h> #include <vlib/pci/pci.h> #include <vppinfra/ring.h> +#include <vppinfra/vector/ip_csum.h> #include <vnet/ethernet/ethernet.h> #include <vnet/ip/ip4_packet.h> @@ -110,7 +111,7 @@ avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso) is_tso ? 0 : clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - (l4_hdr_offset - l3_hdr_offset)); - sum = ~ip_csum (&psh, sizeof (psh)); + sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh)); } else { @@ -119,11 +120,9 @@ avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso) psh.dst = ip6->dst_address; psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol); psh.l4len = is_tso ? 0 : ip6->payload_length; - sum = ~ip_csum (&psh, sizeof (psh)); + sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh)); } - /* ip_csum does a byte swap for some reason... */ - sum = clib_net_to_host_u16 (sum); if (is_tcp) tcp->checksum = sum; else @@ -231,7 +230,11 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq, { const u64 cmd_eop = AVF_TXD_CMD_EOP; u16 n_free_desc, n_desc_left, n_packets_left = n_packets; +#if defined CLIB_HAVE_VEC512 + vlib_buffer_t *b[8]; +#else vlib_buffer_t *b[4]; +#endif avf_tx_desc_t *d = txq->tmp_descs; u32 *tb = txq->tmp_bufs; @@ -242,11 +245,30 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq, while (n_packets_left && n_desc_left) { +#if defined CLIB_HAVE_VEC512 + u32 flags; + u64x8 or_flags_vec512; + u64x8 flags_mask_vec512; +#else u32 flags, or_flags; +#endif +#if defined CLIB_HAVE_VEC512 + if (n_packets_left < 8 || n_desc_left < 8) +#else if (n_packets_left < 8 || n_desc_left < 4) +#endif goto one_by_one; +#if defined CLIB_HAVE_VEC512 + u64x8 base_ptr = u64x8_splat (vm->buffer_main->buffer_mem_start); + u32x8 buf_indices = u32x8_load_unaligned (buffers); + + *(u64x8 *) &b = base_ptr + u64x8_from_u32x8 ( + buf_indices << CLIB_LOG2_CACHE_LINE_BYTES); + + or_flags_vec512 = u64x8_i64gather (u64x8_load_unaligned (b), 0, 1); +#else vlib_prefetch_buffer_with_index (vm, buffers[4], LOAD); vlib_prefetch_buffer_with_index (vm, buffers[5], LOAD); vlib_prefetch_buffer_with_index (vm, buffers[6], LOAD); @@ -258,12 +280,37 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq, b[3] = vlib_get_buffer (vm, buffers[3]); or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags; +#endif +#if defined CLIB_HAVE_VEC512 + flags_mask_vec512 = u64x8_splat ( + VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD | VNET_BUFFER_F_GSO); + if (PREDICT_FALSE ( + !u64x8_is_all_zero (or_flags_vec512 & flags_mask_vec512))) +#else if (PREDICT_FALSE (or_flags & (VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD | VNET_BUFFER_F_GSO))) +#endif goto one_by_one; +#if defined CLIB_HAVE_VEC512 + vlib_buffer_copy_indices (tb, buffers, 8); + avf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma); + avf_tx_fill_data_desc (vm, d + 1, b[1], cmd_eop, use_va_dma); + avf_tx_fill_data_desc (vm, d + 2, b[2], cmd_eop, use_va_dma); + avf_tx_fill_data_desc (vm, d + 3, b[3], cmd_eop, use_va_dma); + avf_tx_fill_data_desc (vm, d + 4, b[4], cmd_eop, use_va_dma); + avf_tx_fill_data_desc (vm, d + 5, b[5], cmd_eop, use_va_dma); + avf_tx_fill_data_desc (vm, d + 6, b[6], cmd_eop, use_va_dma); + avf_tx_fill_data_desc (vm, d + 7, b[7], cmd_eop, use_va_dma); + + buffers += 8; + n_packets_left -= 8; + n_desc_left -= 8; + d += 8; + tb += 8; +#else vlib_buffer_copy_indices (tb, buffers, 4); avf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma); @@ -276,6 +323,8 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq, n_desc_left -= 4; d += 4; tb += 4; +#endif + continue; one_by_one: |