From e58041f242bf4bd120ecc9619b88348d80b94c17 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 18 Jan 2019 19:56:09 +0100 Subject: deprecate clib_memcpy64_x4 Storing buffer in local template seems to be better option.... Change-Id: I1a2fdd68cb956f99a5b36d2cd810fc623e089bcf Signed-off-by: Damjan Marion --- src/plugins/avf/input.c | 24 +++++++++++++++--------- src/plugins/dpdk/buffer.c | 17 +++++++++-------- src/plugins/dpdk/device/node.c | 31 +++++++++++++++---------------- src/plugins/memif/memif.c | 5 +---- src/plugins/memif/node.c | 27 +++++++++++++++++---------- 5 files changed, 57 insertions(+), 47 deletions(-) (limited to 'src/plugins') diff --git a/src/plugins/avf/input.c b/src/plugins/avf/input.c index 8072e94346b..b784bf731c1 100644 --- a/src/plugins/avf/input.c +++ b/src/plugins/avf/input.c @@ -146,7 +146,7 @@ avf_rx_attach_tail (vlib_main_t * vm, vlib_buffer_t * bt, vlib_buffer_t * b, b->next_buffer = t->buffers[i]; b->flags |= VLIB_BUFFER_NEXT_PRESENT; b = vlib_get_buffer (vm, b->next_buffer); - clib_memcpy_fast (b, bt, sizeof (vlib_buffer_t)); + vlib_buffer_copy_template (b, bt); tlnifb += b->current_length = qw1 >> AVF_RXD_LEN_SHIFT; i++; } @@ -161,12 +161,15 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, avf_per_thread_data_t * ptd, u32 n_left, int maybe_multiseg) { - vlib_buffer_t *bt = &ptd->buffer_template; + vlib_buffer_t bt; vlib_buffer_t **b = ptd->bufs; u64 *qw1 = ptd->qw1s; avf_rx_tail_t *tail = ptd->tails; uword n_rx_bytes = 0; + /* copy template into local variable - will save per packet load */ + vlib_buffer_copy_template (&bt, &ptd->buffer_template); + while (n_left >= 4) { if (n_left >= 12) @@ -177,7 +180,10 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_prefetch_buffer_header (b[11], LOAD); } - clib_memcpy64_x4 (b[0], b[1], b[2], b[3], bt); + vlib_buffer_copy_template (b[0], &bt); + vlib_buffer_copy_template (b[1], &bt); + vlib_buffer_copy_template (b[2], &bt); + vlib_buffer_copy_template (b[3], &bt); n_rx_bytes += b[0]->current_length = qw1[0] >> AVF_RXD_LEN_SHIFT; n_rx_bytes += b[1]->current_length = qw1[1] >> AVF_RXD_LEN_SHIFT; @@ -186,10 +192,10 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, if (maybe_multiseg) { - n_rx_bytes += avf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0); - n_rx_bytes += avf_rx_attach_tail (vm, bt, b[1], qw1[1], tail + 1); - n_rx_bytes += avf_rx_attach_tail (vm, bt, b[2], qw1[2], tail + 2); - n_rx_bytes += avf_rx_attach_tail (vm, bt, b[3], qw1[3], tail + 3); + n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[0], qw1[0], tail + 0); + n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[1], qw1[1], tail + 1); + n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[2], qw1[2], tail + 2); + n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[3], qw1[3], tail + 3); } VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); @@ -205,12 +211,12 @@ avf_process_rx_burst (vlib_main_t * vm, vlib_node_runtime_t * node, } while (n_left) { - clib_memcpy_fast (b[0], bt, sizeof (vlib_buffer_t)); + vlib_buffer_copy_template (b[0], &bt); n_rx_bytes += b[0]->current_length = qw1[0] >> AVF_RXD_LEN_SHIFT; if (maybe_multiseg) - n_rx_bytes += avf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0); + n_rx_bytes += avf_rx_attach_tail (vm, &bt, b[0], qw1[0], tail + 0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c index ee63f76b0d4..7093b01162c 100644 --- a/src/plugins/dpdk/buffer.c +++ b/src/plugins/dpdk/buffer.c @@ -241,14 +241,15 @@ CLIB_MULTIARCH_FN (dpdk_buffer_fill_free_list) (vlib_main_t * vm, no_prefetch: vlib_get_buffer_indices_with_offset (vm, (void **) mb, bi, 8, sizeof (struct rte_mbuf)); - clib_memcpy64_x4 (vlib_buffer_from_rte_mbuf (mb[0]), - vlib_buffer_from_rte_mbuf (mb[1]), - vlib_buffer_from_rte_mbuf (mb[2]), - vlib_buffer_from_rte_mbuf (mb[3]), &bt); - clib_memcpy64_x4 (vlib_buffer_from_rte_mbuf (mb[4]), - vlib_buffer_from_rte_mbuf (mb[5]), - vlib_buffer_from_rte_mbuf (mb[6]), - vlib_buffer_from_rte_mbuf (mb[7]), &bt); + + vlib_buffer_copy_template (vlib_buffer_from_rte_mbuf (mb[0]), &bt); + vlib_buffer_copy_template (vlib_buffer_from_rte_mbuf (mb[1]), &bt); + vlib_buffer_copy_template (vlib_buffer_from_rte_mbuf (mb[2]), &bt); + vlib_buffer_copy_template (vlib_buffer_from_rte_mbuf (mb[3]), &bt); + vlib_buffer_copy_template (vlib_buffer_from_rte_mbuf (mb[4]), &bt); + vlib_buffer_copy_template (vlib_buffer_from_rte_mbuf (mb[5]), &bt); + vlib_buffer_copy_template (vlib_buffer_from_rte_mbuf (mb[6]), &bt); + vlib_buffer_copy_template (vlib_buffer_from_rte_mbuf (mb[7]), &bt); n_left -= 8; mb += 8; diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 194c359dbac..250ded5048c 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -40,7 +40,7 @@ STATIC_ASSERT ((PKT_RX_IP_CKSUM_BAD | PKT_RX_FDIR) < static_always_inline uword dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, - struct rte_mbuf *mb, vlib_buffer_free_list_t * fl) + struct rte_mbuf *mb, vlib_buffer_t * bt) { u8 nb_seg = 1; struct rte_mbuf *mb_seg = 0; @@ -59,10 +59,7 @@ dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, ASSERT (mb_seg != 0); b_seg = vlib_buffer_from_rte_mbuf (mb_seg); - vlib_buffer_init_for_free_list (b_seg, fl); - - ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - ASSERT (b_seg->current_data == 0); + vlib_buffer_copy_template (b_seg, bt); /* * The driver (e.g. virtio) may not put the packet data at the start @@ -167,17 +164,16 @@ dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd, { u32 n_left = n_rx_packets; vlib_buffer_t *b[4]; - vlib_buffer_free_list_t *fl; struct rte_mbuf **mb = ptd->mbufs; uword n_bytes = 0; u8 *flags, or_flags = 0; - - if (maybe_multiseg) - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + vlib_buffer_t bt; mb = ptd->mbufs; flags = ptd->flags; + /* copy template into local variable - will save per packet load */ + vlib_buffer_copy_template (&bt, &ptd->buffer_template); while (n_left >= 8) { dpdk_prefetch_buffer_x4 (mb + 4); @@ -187,7 +183,10 @@ dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd, b[2] = vlib_buffer_from_rte_mbuf (mb[2]); b[3] = vlib_buffer_from_rte_mbuf (mb[3]); - clib_memcpy64_x4 (b[0], b[1], b[2], b[3], &ptd->buffer_template); + vlib_buffer_copy_template (b[0], &bt); + vlib_buffer_copy_template (b[1], &bt); + vlib_buffer_copy_template (b[2], &bt); + vlib_buffer_copy_template (b[3], &bt); dpdk_prefetch_mbuf_x4 (mb + 4); @@ -208,10 +207,10 @@ dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd, if (maybe_multiseg) { - n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl); - n_bytes += dpdk_process_subseq_segs (vm, b[1], mb[1], fl); - n_bytes += dpdk_process_subseq_segs (vm, b[2], mb[2], fl); - n_bytes += dpdk_process_subseq_segs (vm, b[3], mb[3], fl); + n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], &bt); + n_bytes += dpdk_process_subseq_segs (vm, b[1], mb[1], &bt); + n_bytes += dpdk_process_subseq_segs (vm, b[2], mb[2], &bt); + n_bytes += dpdk_process_subseq_segs (vm, b[3], mb[3], &bt); } VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); @@ -227,7 +226,7 @@ dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd, while (n_left) { b[0] = vlib_buffer_from_rte_mbuf (mb[0]); - clib_memcpy_fast (b[0], &ptd->buffer_template, 64); + vlib_buffer_copy_template (b[0], &bt); or_flags |= dpdk_ol_flags_extract (mb, flags, 1); flags += 1; @@ -235,7 +234,7 @@ dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd, n_bytes += b[0]->current_length = mb[0]->data_len; if (maybe_multiseg) - n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], fl); + n_bytes += dpdk_process_subseq_segs (vm, b[0], mb[0], &bt); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); /* next */ diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index f976f16dec8..3171ba22f7d 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -848,19 +848,16 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) if (mm->per_thread_data == 0) { int i; - vlib_buffer_free_list_t *fl; vec_validate_aligned (mm->per_thread_data, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); - fl = - vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); for (i = 0; i < tm->n_vlib_mains; i++) { memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data, i); vlib_buffer_t *bt = &ptd->buffer_template; - vlib_buffer_init_for_free_list (bt, fl); + clib_memset (bt, 0, sizeof (vlib_buffer_t)); bt->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; bt->total_length_not_including_first_buffer = 0; vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0; diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index 3cb79541c17..490c60356db 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -180,7 +180,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, memif_main_t *mm = &memif_main; memif_ring_t *ring; memif_queue_t *mq; - u16 buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + u16 buffer_size = VLIB_BUFFER_DATA_SIZE; uword n_trace = vlib_get_trace_count (vm, node); u16 nexts[MEMIF_RX_VECTOR_SZ], *next = nexts; u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi; @@ -190,7 +190,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u32 thread_index = vm->thread_index; memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data, thread_index); - vlib_buffer_t *bt = &ptd->buffer_template; + vlib_buffer_t bt; u16 cur_slot, last_slot, ring_size, n_slots, mask; i16 start_offset; u16 n_buffers = 0, n_alloc; @@ -338,10 +338,11 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } /* prepare buffer template and next indices */ - vnet_buffer (bt)->sw_if_index[VLIB_RX] = mif->sw_if_index; - vnet_buffer (bt)->feature_arc_index = 0; - bt->current_data = start_offset; - bt->current_config_index = 0; + vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = + mif->sw_if_index; + vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0; + ptd->buffer_template.current_data = start_offset; + ptd->buffer_template.current_config_index = 0; if (mode == MEMIF_INTERFACE_MODE_ETHERNET) { @@ -350,7 +351,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, next_index = mif->per_interface_next_index; else vnet_feature_start_device_input_x1 (mif->sw_if_index, &next_index, - bt); + &ptd->buffer_template); vlib_get_new_next_frame (vm, node, next_index, to_next_bufs, n_left_to_next); @@ -374,6 +375,9 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, po = ptd->packet_ops; bi = to_next_bufs; + /* copy template into local variable - will save per packet load */ + vlib_buffer_copy_template (&bt, &ptd->buffer_template); + while (n_from >= 8) { b0 = vlib_get_buffer (vm, po[4].first_buffer_vec_index); @@ -402,7 +406,10 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, b2 = vlib_get_buffer (vm, bi[2]); b3 = vlib_get_buffer (vm, bi[3]); - clib_memcpy64_x4 (b0, b1, b2, b3, bt); + vlib_buffer_copy_template (b0, &bt); + vlib_buffer_copy_template (b1, &bt); + vlib_buffer_copy_template (b2, &bt); + vlib_buffer_copy_template (b3, &bt); b0->current_length = po[0].packet_len; n_rx_bytes += b0->current_length; @@ -439,7 +446,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, fbvi[0] = po[0].first_buffer_vec_index; bi[0] = ptd->buffers[fbvi[0]]; b0 = vlib_get_buffer (vm, bi[0]); - clib_memcpy_fast (b0, bt, 64); + vlib_buffer_copy_template (b0, &bt); b0->current_length = po->packet_len; n_rx_bytes += b0->current_length; @@ -559,7 +566,7 @@ memif_device_input_zc_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* asume that somebody will want to add ethernet header on the packet so start with IP header at offset 14 */ start_offset = (mode == MEMIF_INTERFACE_MODE_IP) ? 14 : 0; - buffer_length = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES - start_offset; + buffer_length = VLIB_BUFFER_DATA_SIZE - start_offset; cur_slot = mq->last_tail; last_slot = ring->tail; -- cgit 1.2.3-korg