From 99e97518e5fca260cb4e410838cc0c1ef70ac42d Mon Sep 17 00:00:00 2001 From: Michal Mazur Date: Fri, 11 Aug 2017 21:11:12 +0200 Subject: Add initial support for ODP buffers. Based on patch from Sreejith Surendran Nair. Change-Id: I32ff73871ce0439378a7e3f0f9a93ac169e770cb Signed-off-by: Michal Mazur Signed-off-by: Sreejith Surendran Nair Signed-off-by: Sachin Saxena --- src/plugins/odp.am | 3 +- src/plugins/odp/buffer.c | 119 +++++++++++++++++++++++++ src/plugins/odp/device.c | 82 ++++++++++------- src/plugins/odp/node.c | 206 +++++++++++++++++++------------------------ src/plugins/odp/odp_packet.c | 21 ++++- src/plugins/odp/odp_packet.h | 14 ++- 6 files changed, 290 insertions(+), 155 deletions(-) create mode 100644 src/plugins/odp/buffer.c (limited to 'src') diff --git a/src/plugins/odp.am b/src/plugins/odp.am index 5de071ee..40316f44 100644 --- a/src/plugins/odp.am +++ b/src/plugins/odp.am @@ -18,7 +18,8 @@ odp_plugin_la_LDFLAGS = $(AM_LDFLAGS) $(ODP_LIBS) odp_plugin_la_SOURCES = odp/cli.c \ odp/node.c \ odp/odp_packet.c \ - odp/device.c + odp/device.c \ + odp/buffer.c noinst_HEADERS += odp/odp_packet.h diff --git a/src/plugins/odp/buffer.c b/src/plugins/odp/buffer.c new file mode 100644 index 00000000..b347bbae --- /dev/null +++ b/src/plugins/odp/buffer.c @@ -0,0 +1,119 @@ +/* Copyright (c) 2017, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/* + * Allocate/free ODP buffers. + */ + +#include +#include +#include + +/* Allocate a given number of buffers into given array. + Returns number actually allocated which will be either zero or + number requested. */ +u32 +odp_packet_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + odp_packet_main_t *om = &odp_packet_main; + u32 len = SHM_PKT_BUF_SIZE, total = 0; + odp_packet_t pkt; + + do + { + pkt = odp_packet_alloc (om->pool, len); + if (pkt == ODP_PACKET_INVALID) + break; + + buffers[total] = vlib_get_buffer_index (vm, odp_packet_user_area (pkt)); + ((vlib_buffer_t *)odp_packet_user_area (pkt))->l2_priv_data = (void *)pkt; + } + while (++total < n_buffers); + + return total; +} + + +static_always_inline void +odp_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, + u32 follow_next) +{ + odp_packet_t pkt; + u32 count = 0, bi; + vlib_buffer_t *buffer; + + do + { + bi = buffers[count]; + do + { + buffer = vlib_get_buffer (vm, bi); + pkt = odp_packet_from_vlib_buffer (buffer); + odp_packet_free (pkt); + if (follow_next == 0) + break; + bi = buffer->next_buffer; + } + while (buffer->flags & VLIB_BUFFER_NEXT_PRESENT); + count++; + } + while (count < n_buffers); +} + +static void +odp_packet_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + odp_buffer_free_inline (vm, buffers, n_buffers, 1); +} + +static void +odp_packet_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, + u32 n_buffers) +{ + odp_buffer_free_inline (vm, buffers, n_buffers, 0); +} + +static void +odp_packet_template_init (vlib_main_t * vm, + void *vt, + void *packet_data, + uword n_packet_data_bytes, + uword min_n_buffers_each_physmem_alloc, u8 * name) +{ + vlib_packet_template_t *t = (vlib_packet_template_t *) vt; + + vlib_worker_thread_barrier_sync (vm); + memset (t, 0, sizeof (t[0])); + + vec_add (t->packet_data, packet_data, n_packet_data_bytes); + + vlib_worker_thread_barrier_release (vm); +} + + +static vlib_buffer_callbacks_t odp_callbacks = { + .vlib_buffer_alloc_cb = &odp_packet_buffer_alloc, + .vlib_buffer_free_cb = &odp_packet_buffer_free, + .vlib_buffer_free_no_next_cb = &odp_packet_buffer_free_no_next, + .vlib_packet_template_init_cb = &odp_packet_template_init, +}; + +static clib_error_t * +odp_buffer_init (vlib_main_t * vm) +{ + vlib_buffer_cb_register (vm, &odp_callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (odp_buffer_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/odp/device.c b/src/plugins/odp/device.c index cf7493c6..da8f4833 100755 --- a/src/plugins/odp/device.c +++ b/src/plugins/odp/device.c @@ -37,7 +37,7 @@ format_odp_packet_device_name (u8 * s, va_list * args) { u32 i = va_arg (*args, u32); odp_packet_main_t *om = &odp_packet_main; - odp_packet_if_t *oif = vec_elt_at_index (om->interfaces, i); + odp_packet_if_t *oif = pool_elt_at_index (om->interfaces, i); s = format (s, "odp-%s", oif->host_if_name); return s; @@ -69,7 +69,9 @@ odp_packet_interface_tx (vlib_main_t * vm, odp_packet_if_t *oif = pool_elt_at_index (om->interfaces, rd->dev_instance); odp_pktout_queue_t pktout; odp_packet_t pkt_tbl[VLIB_FRAME_SIZE]; - u32 sent = 0, count = 0; + u32 sent, count = 0; + vlib_buffer_t *b0; + u32 bi; if (PREDICT_FALSE (oif->lockp != 0)) { @@ -84,50 +86,65 @@ odp_packet_interface_tx (vlib_main_t * vm, while (n_left > 0) { - u32 len; - vlib_buffer_t *b0; + odp_packet_t pkt; + int ret, diff; + + bi = buffers[0]; n_left--; - u32 bi = buffers[0]; buffers++; + next_present: do { b0 = vlib_get_buffer (vm, bi); - len = b0->current_length; - pkt_tbl[count] = odp_packet_alloc (om->pool, len); - - if (pkt_tbl[count] == ODP_PACKET_INVALID) - { - clib_warning ("odp packet alloc failed"); - } - - clib_memcpy ((u8 *) (odp_packet_data (pkt_tbl[count])), - vlib_buffer_get_current (b0), len); + pkt = odp_packet_from_vlib_buffer (b0); + + diff = (uintptr_t) (b0->data + b0->current_data) - + (uintptr_t) odp_packet_data (pkt); + if (diff > 0) + odp_packet_pull_head (pkt, diff); + else if (diff < 0) + odp_packet_push_head (pkt, -diff); + diff = b0->current_length - odp_packet_len (pkt); + if (diff > 0) + odp_packet_push_tail (pkt, diff); + else if (diff < 0) + odp_packet_pull_tail (pkt, -diff); + pkt_tbl[count] = pkt; count++; + bi = b0->next_buffer; } - while ((bi = b0->next_buffer) && (count < VLIB_FRAME_SIZE)); - } + while ((b0->flags & VLIB_BUFFER_NEXT_PRESENT) + && (count < VLIB_FRAME_SIZE)); - CLIB_MEMORY_BARRIER (); + if ((n_left > 0) && (count < VLIB_FRAME_SIZE)) + continue; - sent = odp_pktout_send (pktout, pkt_tbl, count); - sent = sent > 0 ? sent : 0; - - if (odp_unlikely (sent < count)) - { - do + sent = 0; + while (count > 0) { - odp_packet_free (pkt_tbl[sent]); + ret = odp_pktout_send (pktout, &pkt_tbl[sent], count); + if (odp_unlikely (ret <= 0)) + { + /* Drop one packet and try again */ + odp_packet_free (pkt_tbl[sent]); + count--; + sent++; + } + else + { + count -= ret; + sent += ret; + } } - while (++sent < count); + if (b0->flags & VLIB_BUFFER_NEXT_PRESENT) + goto next_present; } if (PREDICT_FALSE (oif->lockp != 0)) *oif->lockp = 0; - vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); - - return frame->n_vectors; + return (frame->n_vectors - n_left); } static void @@ -144,10 +161,9 @@ odp_packet_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, return; } - oif->per_interface_next_index = - vlib_node_add_next (vlib_get_main (), odp_packet_input_node.index, - node_index); - + oif->per_interface_next_index = vlib_node_add_next (vlib_get_main (), + odp_packet_input_node. + index, node_index); } static void diff --git a/src/plugins/odp/node.c b/src/plugins/odp/node.c index d03480c1..a1ebce0c 100755 --- a/src/plugins/odp/node.c +++ b/src/plugins/odp/node.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #define foreach_odp_packet_input_error @@ -49,30 +48,11 @@ format_odp_packet_input_trace (u8 * s, va_list * args) return s; } -always_inline void -buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi); - vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi); - - /* update first buffer */ - first_b->total_length_not_including_first_buffer += b->current_length; - - /* update previous buffer */ - prev_b->next_buffer = bi; - prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; - - /* update current buffer */ - b->next_buffer = 0; - - -} - -always_inline int +int odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[]) { - u32 num_evts = 0, num_pkts = 0, i = 0; + u32 num_evts = 0, num_pkts = 0; + int i; odp_queue_t inq; odp_event_t evt_tbl[VLIB_FRAME_SIZE]; u64 sched_wait = odp_schedule_wait_time (ODP_TIME_MSEC_IN_NS * 100); @@ -91,11 +71,18 @@ odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[]) return -1; } - if (inq != ODP_QUEUE_INVALID) - num_evts = odp_queue_deq_multi (inq, evt_tbl, VLIB_FRAME_SIZE); - else - num_evts = - odp_schedule_multi (NULL, sched_wait, evt_tbl, VLIB_FRAME_SIZE); + while (num_evts < VLIB_FRAME_SIZE) + { + if (inq != ODP_QUEUE_INVALID) + i = odp_queue_deq_multi (inq, &evt_tbl[num_evts], + VLIB_FRAME_SIZE - num_evts); + else + i = odp_schedule_multi (NULL, sched_wait, &evt_tbl[num_evts], + VLIB_FRAME_SIZE - num_evts); + if (i <= 0) + break; + num_evts += i; + } /* convert events to packets, discarding any non-packet events */ for (i = 0; i < num_evts; ++i) @@ -107,14 +94,14 @@ odp_packet_queue_mode (odp_pktio_t pktio, u32 mode, odp_packet_t pkt_tbl[]) } return num_pkts; - } -always_inline int +int odp_packet_burst_mode (odp_pktio_t pktio, odp_pktin_queue_t pktin, odp_packet_t pkt_tbl[]) { - u32 num_pkts; + u32 num_pkts = 0; + int ret; if (odp_pktin_queue (pktio, &pktin, 1) != 1) { @@ -122,10 +109,50 @@ odp_packet_burst_mode (odp_pktio_t pktio, odp_pktin_queue_t pktin, return -1; } - num_pkts = odp_pktin_recv (pktin, pkt_tbl, VLIB_FRAME_SIZE); + while (num_pkts < VLIB_FRAME_SIZE) + { + ret = odp_pktin_recv (pktin, &pkt_tbl[num_pkts], + VLIB_FRAME_SIZE - num_pkts); + if (ret <= 0) + break; + num_pkts += ret; + } return num_pkts; +} + +always_inline int +vlib_buffer_is_ip4 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); +} + +always_inline int +vlib_buffer_is_ip6 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); +} +always_inline int +vlib_buffer_is_mpls (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS)); +} + +always_inline u32 +odp_rx_next_from_etype (void *mb, vlib_buffer_t * b0) +{ + if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0))) + return VNET_DEVICE_INPUT_NEXT_IP4_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0))) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; } always_inline uword @@ -134,32 +161,12 @@ odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, { u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; uword n_trace = vlib_get_trace_count (vm, node); - odp_packet_main_t *om = &odp_packet_main; u32 n_rx_packets = 0; u32 n_rx_bytes = 0; u32 *to_next = 0; - u32 n_free_bufs; - u32 thread_index = vlib_get_thread_index (); odp_pktin_queue_t pktin = { 0 }; - odp_packet_t pkt, pkt_tbl[VLIB_FRAME_SIZE]; + odp_packet_t pkt_tbl[VLIB_FRAME_SIZE]; u32 pkts = 0, pkts_ok = 0; - u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - - if (oif->per_interface_next_index != ~0) - next_index = oif->per_interface_next_index; - - n_free_bufs = vec_len (om->rx_buffers[thread_index]); - if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) - { - vec_validate (om->rx_buffers[thread_index], - VLIB_FRAME_SIZE + n_free_bufs - 1); - n_free_bufs += - vlib_buffer_alloc (vm, &om->rx_buffers[thread_index][n_free_bufs], - VLIB_FRAME_SIZE); - _vec_len (om->rx_buffers[thread_index]) = n_free_bufs; - - } if ((oif->mode == (APPL_MODE_PKT_QUEUE)) || (oif->mode == (APPL_MODE_PKT_SCHED))) @@ -178,84 +185,52 @@ odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, pkts_ok = drop_err_pkts (pkt_tbl, pkts); vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while ((i < pkts_ok) && (n_left_to_next) && (n_free_bufs)) + while ((i < pkts_ok) && (n_left_to_next)) { - vlib_buffer_t *first_b0 = 0; - u32 offset = 0; - u32 bi0 = 0, first_bi0 = 0, prev_bi0; - uint8_t *data_buf; - pkt = pkt_tbl[i]; - u32 data_len = odp_packet_len (pkt); - data_buf = malloc (data_len); - memset (data_buf, 0, data_len); - odp_packet_copy_to_mem (pkt, 0, data_len, data_buf); - - while (data_len && n_free_bufs) - { - vlib_buffer_t *b0; - /* grab free buffer */ - u32 last_empty_buffer = - vec_len (om->rx_buffers[thread_index]) - 1; - prev_bi0 = bi0; - bi0 = om->rx_buffers[thread_index][last_empty_buffer]; - b0 = vlib_get_buffer (vm, bi0); - _vec_len (om->rx_buffers[thread_index]) = last_empty_buffer; - n_free_bufs--; - /* copy data */ - u32 bytes_to_copy = - data_len > n_buffer_bytes ? n_buffer_bytes : data_len; - b0->current_data = 0; - clib_memcpy (vlib_buffer_get_current (b0), - (u8 *) data_buf + offset, bytes_to_copy); - - /* fill buffer header */ - b0->current_length = bytes_to_copy; - - if (offset == 0) - { - b0->total_length_not_including_first_buffer = 0; - b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; - vnet_buffer (b0)->sw_if_index[VLIB_RX] = oif->sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - first_bi0 = bi0; - first_b0 = vlib_get_buffer (vm, first_bi0); - } - else - { - buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0); - } - - offset += bytes_to_copy; - data_len -= bytes_to_copy; - } + u32 bi0 = 0; + vlib_buffer_t *b0; + + b0 = (vlib_buffer_t *) odp_packet_user_area (pkt_tbl[i]); + bi0 = vlib_get_buffer_index (vm, b0); + b0->l2_priv_data = pkt_tbl[i]; + + b0->current_length = odp_packet_len (pkt_tbl[i]); + b0->current_data = 0; + b0->total_length_not_including_first_buffer = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = oif->sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + if (PREDICT_FALSE (oif->per_interface_next_index != ~0)) + next0 = oif->per_interface_next_index; + else + next0 = odp_rx_next_from_etype (pkt_tbl[i], b0); + + vlib_buffer_advance (b0, device_input_next_node_advance[next0]); + /* trace */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); if (PREDICT_FALSE (n_trace > 0)) { odp_packet_input_trace_t *tr; - vlib_trace_buffer (vm, node, next0, first_b0, 0); + vlib_trace_buffer (vm, node, next0, b0, 0); vlib_set_trace_count (vm, node, --n_trace); - tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->next_index = next0; tr->hw_if_index = oif->hw_if_index; } - /* redirect if feature path enabled */ - vnet_feature_start_device_input_x1 (oif->sw_if_index, &next0, - first_b0); + n_left_to_next--; + to_next[0] = bi0; + to_next += 1; /* enque and take next packet */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, first_bi0, next0); + n_left_to_next, bi0, next0); /* next packet */ n_rx_packets++; - n_rx_bytes += odp_packet_len (pkt); - to_next[0] = first_bi0; - to_next += 1; - n_left_to_next--; - free (data_buf); - odp_packet_free (pkt_tbl[i]); + n_rx_bytes += odp_packet_len (pkt_tbl[i]); i++; } @@ -270,7 +245,6 @@ odp_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, n_rx_packets, n_rx_bytes); return n_rx_packets; - } static uword diff --git a/src/plugins/odp/odp_packet.c b/src/plugins/odp/odp_packet.c index d2088142..b7e5a8fc 100755 --- a/src/plugins/odp/odp_packet.c +++ b/src/plugins/odp/odp_packet.c @@ -286,15 +286,19 @@ odp_packet_init (vlib_main_t * vm) odp_packet_main_t *om = &odp_packet_main; vlib_thread_main_t *tm = vlib_get_thread_main (); vlib_thread_registration_t *tr; + vlib_physmem_main_t *vpm = &vm->physmem_main; uword *p; odp_platform_init_t platform_params; odp_pool_param_t params; + odp_pool_capability_t capa; memset (om, 0, sizeof (odp_packet_main_t)); om->input_cpu_first_index = 0; om->input_cpu_count = 1; om->if_count = 0; + memset (&platform_params, 0, sizeof (platform_params)); + platform_params.memory = 100; if (odp_init_global (&om->instance, NULL, &platform_params)) clib_warning ("Error:ODP global init failed"); @@ -305,18 +309,27 @@ odp_packet_init (vlib_main_t * vm) odp_term_global (om->instance); } + + odp_pool_capability (&capa); + if (capa.pkt.min_headroom != VLIB_BUFFER_PRE_DATA_SIZE) + { + return clib_error_return (0, + "Packet Headroom for VPP and ODP must be equal"); + } + /* Create packet pool */ odp_pool_param_init (¶ms); params.pkt.seg_len = SHM_PKT_POOL_BUF_SIZE; params.pkt.len = SHM_PKT_POOL_BUF_SIZE; params.type = ODP_POOL_PACKET; params.pkt.num = SHM_PKT_POOL_NB_PKTS; + params.pkt.uarea_size = sizeof (vlib_buffer_t) - VLIB_BUFFER_PRE_DATA_SIZE; om->pool = odp_pool_create (SHM_PKT_POOL_NAME, ¶ms); if (om->pool == ODP_POOL_INVALID) { - clib_warning ("Error: packet pool create failed"); + return clib_error_return (0, "Packet pool create failed"); } /* find out which cpus will be used for input */ @@ -331,8 +344,10 @@ odp_packet_init (vlib_main_t * vm) mhash_init_vec_string (&om->if_index_by_host_if_name, sizeof (uword)); - vec_validate_aligned (om->rx_buffers, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); + vpm->virtual.start = params.pool_start; + vpm->virtual.end = params.pool_end; + vpm->virtual.size = params.pool_size; + return 0; } diff --git a/src/plugins/odp/odp_packet.h b/src/plugins/odp/odp_packet.h index 32c31f8f..c3906a49 100755 --- a/src/plugins/odp/odp_packet.h +++ b/src/plugins/odp/odp_packet.h @@ -6,6 +6,7 @@ #include +#define SHM_PKT_BUF_SIZE 1598 #define SHM_PKT_POOL_BUF_SIZE 1856 #define SHM_PKT_POOL_NB_PKTS 10240 #define SHM_PKT_POOL_NAME "packet_pool" @@ -33,8 +34,6 @@ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); odp_packet_if_t *interfaces; - /* rx buffer cache */ - u32 **rx_buffers; u32 input_cpu_first_index; u32 input_cpu_count; /* hash of host interface names */ @@ -54,6 +53,17 @@ u32 odp_packet_delete_if (vlib_main_t * vm, u8 * host_if_name); u32 drop_err_pkts (odp_packet_t pkt_tbl[], u32 len); +always_inline odp_packet_t +odp_packet_from_vlib_buffer (vlib_buffer_t * b) +{ + odp_packet_t packet; + packet = (odp_packet_t)(b->l2_priv_data); + if (packet == NULL) + clib_error("ODP packet pointer was not set properly!\n"); + + return packet; +} + /* * fd.io coding-style-patch-verification: ON * -- cgit 1.2.3-korg