diff options
-rw-r--r-- | vlib/vlib/threads.c | 4 | ||||
-rw-r--r-- | vlib/vlib/threads.h | 22 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/cli.c | 380 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/device.c | 314 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/dpdk.h | 95 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/init.c | 9 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/node.c | 228 |
7 files changed, 41 insertions, 1011 deletions
diff --git a/vlib/vlib/threads.c b/vlib/vlib/threads.c index 70d4019a1fd..fb84c676cb3 100644 --- a/vlib/vlib/threads.c +++ b/vlib/vlib/threads.c @@ -287,10 +287,6 @@ vlib_thread_init (vlib_main_t * vm) vec_validate_aligned (vlib_worker_threads, first_index - 1, CLIB_CACHE_LINE_BYTES); - - tm->efd.enabled = VLIB_EFD_DISABLED; - tm->efd.queue_hi_thresh = ((VLIB_EFD_DEF_WORKER_HI_THRESH_PCT * - FRAME_QUEUE_NELTS) / 100); return 0; } diff --git a/vlib/vlib/threads.h b/vlib/vlib/threads.h index c2db86442aa..fd09c31181c 100644 --- a/vlib/vlib/threads.h +++ b/vlib/vlib/threads.h @@ -119,7 +119,6 @@ typedef struct u64 enqueue_ticks; u64 enqueue_vectors; u32 enqueue_full_events; - u32 enqueue_efd_discards; /* dequeue side */ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); @@ -249,25 +248,6 @@ do { \ vec_free (__vlib_mains); \ } while (0); - -/* Early-Fast-Discard (EFD) */ -#define VLIB_EFD_DISABLED 0 -#define VLIB_EFD_DISCARD_ENABLED (1 << 0) -#define VLIB_EFD_MONITOR_ENABLED (1 << 1) - -#define VLIB_EFD_DEF_WORKER_HI_THRESH_PCT 90 - -/* EFD worker thread settings */ -typedef struct vlib_efd_t -{ - u16 enabled; - u16 queue_hi_thresh; - u8 ip_prec_bitmap; - u8 mpls_exp_bitmap; - u8 vlan_cos_bitmap; - u8 pad; -} vlib_efd_t; - #define foreach_sched_policy \ _(SCHED_OTHER, OTHER, "other") \ _(SCHED_BATCH, BATCH, "batch") \ @@ -328,8 +308,6 @@ typedef struct /* Bitmap of available CPU sockets (NUMA nodes) */ uword *cpu_socket_bitmap; - vlib_efd_t efd; - /* Worker handoff queues */ vlib_frame_queue_main_t *frame_queue_mains; diff --git a/vnet/vnet/devices/dpdk/cli.c b/vnet/vnet/devices/dpdk/cli.c index 5e53a98beae..a9f91ec5a6d 100644 --- a/vnet/vnet/devices/dpdk/cli.c +++ b/vnet/vnet/devices/dpdk/cli.c @@ -256,386 +256,6 @@ VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = { }; /* *INDENT-ON* */ -static void -show_dpdk_device_stats (vlib_main_t * vm, dpdk_device_t * xd) -{ - vlib_cli_output (vm, - "device_index %d\n" - " last_burst_sz %d\n" - " max_burst_sz %d\n" - " full_frames_cnt %u\n" - " consec_full_frames_cnt %u\n" - " congestion_cnt %d\n" - " last_poll_time %llu\n" - " max_poll_delay %llu\n" - " discard_cnt %u\n" - " total_packet_cnt %u\n", - xd->device_index, - xd->efd_agent.last_burst_sz, - xd->efd_agent.max_burst_sz, - xd->efd_agent.full_frames_cnt, - xd->efd_agent.consec_full_frames_cnt, - xd->efd_agent.congestion_cnt, - xd->efd_agent.last_poll_time, - xd->efd_agent.max_poll_delay, - xd->efd_agent.discard_cnt, xd->efd_agent.total_packet_cnt); - - u32 device_queue_sz = rte_eth_rx_queue_count (xd->device_index, - 0 /* queue_id */ ); - vlib_cli_output (vm, " device_queue_sz %u\n", device_queue_sz); -} - -static void -show_efd_config (vlib_main_t * vm) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - - vlib_cli_output (vm, - "dpdk: (0x%04x) enabled:%d monitor:%d drop_all:%d\n" - " dpdk_queue_hi_thresh %d\n" - " consec_full_frames_hi_thresh %d\n" - "---------\n" - "worker: (0x%04x) enabled:%d monitor:%d\n" - " worker_queue_hi_thresh %d\n", - dm->efd.enabled, - ((dm->efd.enabled & DPDK_EFD_DISCARD_ENABLED) ? 1 : 0), - ((dm->efd.enabled & DPDK_EFD_MONITOR_ENABLED) ? 1 : 0), - ((dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED) ? 1 : 0), - dm->efd.queue_hi_thresh, - dm->efd.consec_full_frames_hi_thresh, - tm->efd.enabled, - ((tm->efd.enabled & VLIB_EFD_DISCARD_ENABLED) ? 1 : 0), - ((dm->efd.enabled & VLIB_EFD_MONITOR_ENABLED) ? 1 : 0), - tm->efd.queue_hi_thresh); - vlib_cli_output (vm, - "---------\n" - "ip_prec_bitmap 0x%02x\n" - "mpls_exp_bitmap 0x%02x\n" - "vlan_cos_bitmap 0x%02x\n", - tm->efd.ip_prec_bitmap, - tm->efd.mpls_exp_bitmap, tm->efd.vlan_cos_bitmap); -} - -static clib_error_t * -show_efd (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - - if (unformat (input, "config")) - { - show_efd_config (vm); - } - else if (unformat (input, "dpdk")) - { - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - u32 device_id = ~0; - - (void) unformat (input, "device %d", &device_id); - /* *INDENT-OFF* */ - vec_foreach (xd, dm->devices) - { - if ((xd->device_index == device_id) || (device_id == ~0)) - { - show_dpdk_device_stats(vm, xd); - } - } - /* *INDENT-ON* */ - } - else if (unformat (input, "worker")) - { - vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_thread_registration_t *tr; - u32 num_workers = 0; - u32 first_worker_index = 0; - uword *p; - - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - ASSERT (p); - tr = (vlib_thread_registration_t *) p[0]; - if (tr) - { - num_workers = tr->count; - first_worker_index = tr->first_index; - } - - vlib_cli_output (vm, - "num_workers %d\n" - "first_worker_index %d\n", - num_workers, first_worker_index); - - } - else if (unformat (input, "help")) - { - vlib_cli_output (vm, "Usage: show efd config | " - "dpdk [device <id>] | worker\n"); - } - else - { - show_efd_config (vm); - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_efd_command, static) = { - .path = "show efd", - .short_help = "Show efd [device <id>] | [config]", - .function = show_efd, -}; -/* *INDENT-ON* */ - -static clib_error_t * -clear_efd (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - - /* *INDENT-OFF* */ - vec_foreach (xd, dm->devices) - { - xd->efd_agent.last_burst_sz = 0; - xd->efd_agent.max_burst_sz = 0; - xd->efd_agent.full_frames_cnt = 0; - xd->efd_agent.consec_full_frames_cnt = 0; - xd->efd_agent.congestion_cnt = 0; - xd->efd_agent.last_poll_time = 0; - xd->efd_agent.max_poll_delay = 0; - xd->efd_agent.discard_cnt = 0; - xd->efd_agent.total_packet_cnt = 0; - } - /* *INDENT-ON* */ - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (clear_efd_command,static) = { - .path = "clear efd", - .short_help = "Clear early-fast-discard counters", - .function = clear_efd, -}; -/* *INDENT-ON* */ - -static clib_error_t * -parse_op_and_prec (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd, - char *prec_type, u8 * prec_bitmap) -{ - clib_error_t *error = NULL; - u8 op = 0; - u8 prec = 0; - - if (unformat (input, "ge")) - { - op = EFD_OPERATION_GREATER_OR_EQUAL; - } - else if (unformat (input, "lt")) - { - op = EFD_OPERATION_LESS_THAN; - } - else if (unformat (input, "help")) - { - vlib_cli_output (vm, "enter operation [ge | lt] and precedence <0-7>)"); - return (error); - } - else - { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - - if (unformat (input, "%u", &prec)) - { - if (prec > 7) - { - return clib_error_return (0, "precedence %d is out of range <0-7>", - prec); - } - } - else - { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - - set_efd_bitmap (prec_bitmap, prec, op); - - vlib_cli_output (vm, - "EFD will be set for %s precedence %s%u%s.", - prec_type, - (op == EFD_OPERATION_LESS_THAN) ? "less than " : "", - prec, - (op == - EFD_OPERATION_GREATER_OR_EQUAL) ? " and greater" : ""); - - return (error); -} - - -static clib_error_t * -set_efd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - clib_error_t *error = NULL; - vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, dpdk_input_node.index); - - if (unformat (input, "enable")) - { - if (unformat (input, "dpdk")) - { - dm->efd.enabled |= DPDK_EFD_DISCARD_ENABLED; - } - else if (unformat (input, "worker")) - { - tm->efd.enabled |= VLIB_EFD_DISCARD_ENABLED; - } - else if (unformat (input, "monitor")) - { - dm->efd.enabled |= DPDK_EFD_MONITOR_ENABLED; - tm->efd.enabled |= VLIB_EFD_MONITOR_ENABLED; - } - else if (unformat (input, "drop_all")) - { - dm->efd.enabled |= DPDK_EFD_DROPALL_ENABLED; - } - else if (unformat (input, "default")) - { - dm->efd.enabled = (DPDK_EFD_DISCARD_ENABLED | - DPDK_EFD_MONITOR_ENABLED); - tm->efd.enabled = (VLIB_EFD_DISCARD_ENABLED | - VLIB_EFD_MONITOR_ENABLED); - } - else - { - return clib_error_return (0, "Usage: set efd enable [dpdk | " - "worker | monitor | drop_all | default]"); - } - } - else if (unformat (input, "disable")) - { - if (unformat (input, "dpdk")) - { - dm->efd.enabled &= ~DPDK_EFD_DISCARD_ENABLED; - } - else if (unformat (input, "worker")) - { - tm->efd.enabled &= ~VLIB_EFD_DISCARD_ENABLED; - } - else if (unformat (input, "monitor")) - { - dm->efd.enabled &= ~DPDK_EFD_MONITOR_ENABLED; - tm->efd.enabled &= ~VLIB_EFD_MONITOR_ENABLED; - } - else if (unformat (input, "drop_all")) - { - dm->efd.enabled &= ~DPDK_EFD_DROPALL_ENABLED; - } - else if (unformat (input, "all")) - { - dm->efd.enabled = 0; - tm->efd.enabled = 0; - } - else - { - return clib_error_return (0, "Usage: set efd disable [dpdk | " - "worker | monitor | drop_all | all]"); - } - } - else if (unformat (input, "worker_queue_hi_thresh")) - { - u32 mark; - if (unformat (input, "%u", &mark)) - { - tm->efd.queue_hi_thresh = mark; - } - else - { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - } - else if (unformat (input, "dpdk_device_hi_thresh")) - { - u32 thresh; - if (unformat (input, "%u", &thresh)) - { - dm->efd.queue_hi_thresh = thresh; - } - else - { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - } - else if (unformat (input, "consec_full_frames_hi_thresh")) - { - u32 thresh; - if (unformat (input, "%u", &thresh)) - { - dm->efd.consec_full_frames_hi_thresh = thresh; - } - else - { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - } - else if (unformat (input, "ip-prec")) - { - return (parse_op_and_prec (vm, input, cmd, - "ip", &tm->efd.ip_prec_bitmap)); - } - else if (unformat (input, "mpls-exp")) - { - return (parse_op_and_prec (vm, input, cmd, - "mpls", &tm->efd.mpls_exp_bitmap)); - } - else if (unformat (input, "vlan-cos")) - { - return (parse_op_and_prec (vm, input, cmd, - "vlan", &tm->efd.vlan_cos_bitmap)); - } - else if (unformat (input, "help")) - { - vlib_cli_output (vm, - "Usage:\n" - " set efd enable <dpdk | worker | monitor | drop_all | default> |\n" - " set efd disable <dpdk | worker | monitor | drop_all | all> |\n" - " set efd <ip-prec | mpls-exp | vlan-cos> <ge | lt> <0-7>\n" - " set efd worker_queue_hi_thresh <0-32> |\n" - " set efd dpdk_device_hi_thresh <0-%d> |\n" - " set efd consec_full_frames_hi_thresh <count> |\n", - DPDK_NB_RX_DESC_10GE); - } - else - { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - - if (dm->efd.enabled) - rt->function = dpdk_input_efd_multiarch_select (); - else if (dm->use_rss) - rt->function = dpdk_input_rss_multiarch_select (); - else - rt->function = dpdk_input_multiarch_select (); - - return error; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_efd,static) = { - .path = "set efd", - .short_help = "set early-fast-discard commands", - .function = set_efd, -}; -/* *INDENT-ON* */ - static clib_error_t * set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) diff --git a/vnet/vnet/devices/dpdk/device.c b/vnet/vnet/devices/dpdk/device.c index 411380633e7..d69ee3e453b 100644 --- a/vnet/vnet/devices/dpdk/device.c +++ b/vnet/vnet/devices/dpdk/device.c @@ -254,13 +254,6 @@ dpdk_tx_trace_buffer (dpdk_main_t * dm, * on the tx_vector. If all packets are transmitted (the normal case), the * function returns 0. * - * The tx_burst function may not be able to transmit all packets because the - * dpdk ring is full. If a flowcontrol callback function has been configured - * then the function simply returns. If no callback has been configured, the - * function will retry calling tx_burst with the remaining packets. This will - * continue until all packets are transmitted or tx_burst indicates no packets - * could be transmitted. (The caller can drop the remaining packets.) - * * The function assumes there is at least one packet on the tx_vector. */ static_always_inline @@ -297,21 +290,9 @@ static_always_inline * calls due to a ring wrap. */ ASSERT (n_packets < xd->nb_tx_desc); + ASSERT (ring->tx_tail == 0); - /* - * If there is no flowcontrol callback, there is only temporary buffering - * on the tx_vector and so the tail should always be 0. - */ - ASSERT (dm->flowcontrol_callback || ring->tx_tail == 0); - - /* - * If there is a flowcontrol callback, don't retry any incomplete tx_bursts. - * Apply backpressure instead. If there is no callback, keep retrying until - * a tx_burst sends no packets. n_retry of 255 essentially means no retry - * limit. - */ - n_retry = dm->flowcontrol_callback ? 0 : 255; - + n_retry = 16; queue_id = vm->cpu_index; do @@ -331,78 +312,25 @@ static_always_inline queue_id = (queue_id + 1) % xd->tx_q_used; } - if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ + if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ { - if (PREDICT_TRUE (tx_head > tx_tail)) - { - /* no wrap, transmit in one burst */ - dpdk_device_hqos_per_worker_thread_t *hqos = - &xd->hqos_wt[vm->cpu_index]; - - dpdk_hqos_metadata_set (hqos, - &tx_vector[tx_tail], tx_head - tx_tail); - rv = rte_ring_sp_enqueue_burst (hqos->swq, - (void **) &tx_vector[tx_tail], - (uint16_t) (tx_head - tx_tail)); - } - else - { - /* - * This can only happen if there is a flowcontrol callback. - * We need to split the transmit into two calls: one for - * the packets up to the wrap point, and one to continue - * at the start of the ring. - * Transmit pkts up to the wrap point. - */ - dpdk_device_hqos_per_worker_thread_t *hqos = - &xd->hqos_wt[vm->cpu_index]; - - dpdk_hqos_metadata_set (hqos, - &tx_vector[tx_tail], - xd->nb_tx_desc - tx_tail); - rv = rte_ring_sp_enqueue_burst (hqos->swq, - (void **) &tx_vector[tx_tail], - (uint16_t) (xd->nb_tx_desc - - tx_tail)); - /* - * If we transmitted everything we wanted, then allow 1 retry - * so we can try to transmit the rest. If we didn't transmit - * everything, stop now. - */ - n_retry = (rv == xd->nb_tx_desc - tx_tail) ? 1 : 0; - } + /* no wrap, transmit in one burst */ + dpdk_device_hqos_per_worker_thread_t *hqos = + &xd->hqos_wt[vm->cpu_index]; + + dpdk_hqos_metadata_set (hqos, + &tx_vector[tx_tail], tx_head - tx_tail); + rv = rte_ring_sp_enqueue_burst (hqos->swq, + (void **) &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); } else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) { - if (PREDICT_TRUE (tx_head > tx_tail)) - { - /* no wrap, transmit in one burst */ - rv = rte_eth_tx_burst (xd->device_index, - (uint16_t) queue_id, - &tx_vector[tx_tail], - (uint16_t) (tx_head - tx_tail)); - } - else - { - /* - * This can only happen if there is a flowcontrol callback. - * We need to split the transmit into two calls: one for - * the packets up to the wrap point, and one to continue - * at the start of the ring. - * Transmit pkts up to the wrap point. - */ - rv = rte_eth_tx_burst (xd->device_index, - (uint16_t) queue_id, - &tx_vector[tx_tail], - (uint16_t) (xd->nb_tx_desc - tx_tail)); - - /* - * If we transmitted everything we wanted, then allow 1 retry - * so we can try to transmit the rest. If we didn't transmit - * everything, stop now. - */ - n_retry = (rv == xd->nb_tx_desc - tx_tail) ? 1 : 0; - } + /* no wrap, transmit in one burst */ + rv = rte_eth_tx_burst (xd->device_index, + (uint16_t) queue_id, + &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); } else { @@ -436,58 +364,11 @@ static_always_inline return n_packets; } - -/* - * This function transmits any packets on the interface's tx_vector and returns - * the number of packets untransmitted on the tx_vector. If the tx_vector is - * empty the function simply returns 0. - * - * It is intended to be called by a traffic manager which has flowed-off an - * interface to see if the interface can be flowed-on again. - */ -u32 -dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - int queue_id; - struct rte_mbuf **tx_vector; - tx_ring_hdr_t *ring; - - /* param is dev_instance and not hw_if_index to save another lookup */ - xd = vec_elt_at_index (dm->devices, dev_instance); - - queue_id = vm->cpu_index; - tx_vector = xd->tx_vectors[queue_id]; - - /* If no packets on the ring, don't bother calling tx function */ - ring = vec_header (tx_vector, sizeof (*ring)); - if (ring->tx_head == ring->tx_tail) - { - return 0; - } - - return tx_burst_vector_internal (vm, xd, tx_vector); -} - /* * Transmits the packets on the frame to the interface associated with the * node. It first copies packets on the frame to a tx_vector containing the * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal * which calls the dpdk tx_burst function. - * - * The tx_vector is treated slightly differently depending on whether or - * not a flowcontrol callback function has been configured. If there is no - * callback, the tx_vector is a temporary array of rte_mbuf packet pointers. - * Its entries are written and consumed before the function exits. - * - * If there is a callback then the transmit is being invoked in the presence - * of a traffic manager. Here the tx_vector is treated like a ring of rte_mbuf - * pointers. If not all packets can be transmitted, the untransmitted packets - * stay on the tx_vector until the next call. The callback allows the traffic - * manager to flow-off dequeues to the interface. The companion function - * dpdk_interface_tx_vector() allows the traffic manager to detect when - * it should flow-on the interface again. */ static uword dpdk_interface_tx (vlib_main_t * vm, @@ -745,46 +626,30 @@ dpdk_interface_tx (vlib_main_t * vm, */ tx_pkts = n_on_ring - n_packets; - if (PREDICT_FALSE (dm->flowcontrol_callback != 0)) - { - if (PREDICT_FALSE (n_packets)) - { - /* Callback may want to enable flowcontrol */ - dm->flowcontrol_callback (vm, xd->vlib_hw_if_index, - ring->tx_head - ring->tx_tail); - } - else - { - /* Reset head/tail to avoid unnecessary wrap */ - ring->tx_head = 0; - ring->tx_tail = 0; - } - } - else - { - /* If there is no callback then drop any non-transmitted packets */ - if (PREDICT_FALSE (n_packets)) - { - vlib_simple_counter_main_t *cm; - vnet_main_t *vnm = vnet_get_main (); + { + /* If there is no callback then drop any non-transmitted packets */ + if (PREDICT_FALSE (n_packets)) + { + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_TX_ERROR); + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - n_packets); + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + n_packets); - vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, - n_packets); + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, + n_packets); - while (n_packets--) - rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); - } + while (n_packets--) + rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); + } - /* Reset head/tail to avoid unnecessary wrap */ - ring->tx_head = 0; - ring->tx_tail = 0; - } + /* Reset head/tail to avoid unnecessary wrap */ + ring->tx_head = 0; + ring->tx_tail = 0; + } /* Recycle replicated buffers */ if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu]))) @@ -981,22 +846,8 @@ VNET_DEVICE_CLASS (dpdk_device_class) = { VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx) /* *INDENT-ON* */ -void -dpdk_set_flowcontrol_callback (vlib_main_t * vm, - dpdk_flowcontrol_callback_t callback) -{ - dpdk_main.flowcontrol_callback = callback; -} - #define UP_DOWN_FLAG_EVENT 1 - -u32 -dpdk_get_admin_up_down_in_progress (void) -{ - return dpdk_main.admin_up_down_in_progress; -} - uword admin_up_down_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) @@ -1050,99 +901,6 @@ VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { /* *INDENT-ON* */ /* - * Asynchronously invoke vnet_sw_interface_set_flags via the admin_up_down - * process. Useful for avoiding long blocking delays (>150ms) in the dpdk - * drivers. - * WARNING: when posting this event, no other interface-related calls should - * be made (e.g. vnet_create_sw_interface()) while the event is being - * processed (admin_up_down_in_progress). This is required in order to avoid - * race conditions in manipulating interface data structures. - */ -void -post_sw_interface_set_flags (vlib_main_t * vm, u32 sw_if_index, u32 flags) -{ - uword *d = vlib_process_signal_event_data - (vm, admin_up_down_process_node.index, - UP_DOWN_FLAG_EVENT, 2, sizeof (u32)); - d[0] = sw_if_index; - d[1] = flags; -} - -/* - * Return a copy of the DPDK port stats in dest. - */ -clib_error_t * -dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats *dest) -{ - dpdk_main_t *dm = &dpdk_main; - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - if (!dest) - { - return clib_error_return (0, "Missing or NULL argument"); - } - if (!xd) - { - return clib_error_return (0, - "Unable to get DPDK device from HW interface"); - } - - dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); - - clib_memcpy (dest, &xd->stats, sizeof (xd->stats)); - return (0); -} - -/* - * Return the number of dpdk mbufs - */ -u32 -dpdk_num_mbufs (void) -{ - dpdk_main_t *dm = &dpdk_main; - - return dm->conf->num_mbufs; -} - -/* - * Return the pmd type for a given hardware interface - */ -dpdk_pmd_t -dpdk_get_pmd_type (vnet_hw_interface_t * hi) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - - assert (hi); - - xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - assert (xd); - - return xd->pmd; -} - -/* - * Return the cpu socket for a given hardware interface - */ -i8 -dpdk_get_cpu_socket (vnet_hw_interface_t * hi) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - - assert (hi); - - xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - assert (xd); - - return xd->cpu_socket; -} - -/* * fd.io coding-style-patch-verification: ON * * Local Variables: diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h index 1d2b4b7655e..ad973323359 100644 --- a/vnet/vnet/devices/dpdk/dpdk.h +++ b/vnet/vnet/devices/dpdk/dpdk.h @@ -124,30 +124,6 @@ typedef enum VNET_DPDK_PORT_TYPE_UNKNOWN, } dpdk_port_type_t; -typedef struct -{ - f64 deadline; - vlib_frame_t *frame; -} dpdk_frame_t; - -#define DPDK_EFD_MAX_DISCARD_RATE 10 - -typedef struct -{ - u16 last_burst_sz; - u16 max_burst_sz; - u32 full_frames_cnt; - u32 consec_full_frames_cnt; - u32 congestion_cnt; - u64 last_poll_time; - u64 max_poll_delay; - u32 discard_cnt; - u32 total_packet_cnt; -} dpdk_efd_agent_t; - -typedef void (*dpdk_flowcontrol_callback_t) (vlib_main_t * vm, - u32 hw_if_index, u32 n_packets); - /* * The header for the tx_vector in dpdk_device_t. * Head and tail are indexes into the tx_vector and are of type @@ -253,8 +229,6 @@ typedef struct struct rte_eth_xstat *last_cleared_xstats; f64 time_last_stats_update; dpdk_port_type_t port_type; - - dpdk_efd_agent_t efd_agent; } dpdk_device_t; #define DPDK_STATS_POLL_INTERVAL (10.0) @@ -285,23 +259,6 @@ typedef struct u16 queue_id; } dpdk_device_and_queue_t; -/* Early-Fast-Discard (EFD) */ -#define DPDK_EFD_DISABLED 0 -#define DPDK_EFD_DISCARD_ENABLED (1 << 0) -#define DPDK_EFD_MONITOR_ENABLED (1 << 1) -#define DPDK_EFD_DROPALL_ENABLED (1 << 2) - -#define DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT 90 -#define DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH 6 - -typedef struct dpdk_efd_t -{ - u16 enabled; - u16 queue_hi_thresh; - u16 consec_full_frames_hi_thresh; - u16 pad; -} dpdk_efd_t; - #ifndef DPDK_HQOS_DBG_BYPASS #define DPDK_HQOS_DBG_BYPASS 0 #endif @@ -413,9 +370,6 @@ typedef struct /* buffer flags template, configurable to enable/disable tcp / udp cksum */ u32 buffer_flags_template; - /* flow control callback. If 0 then flow control is disabled */ - dpdk_flowcontrol_callback_t flowcontrol_callback; - /* vlib buffer free list, must be same size as an rte_mbuf */ u32 vlib_buffer_free_list_index; @@ -441,9 +395,6 @@ typedef struct uword *vu_sw_if_index_by_sock_fd; u32 *vu_inactive_interfaces_device_index; - /* efd (early-fast-discard) settings */ - dpdk_efd_t efd; - /* * flag indicating that a posted admin up/down * (via post_sw_interface_set_flags) is in progress @@ -506,13 +457,8 @@ void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd); clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); -void dpdk_set_flowcontrol_callback (vlib_main_t * vm, - dpdk_flowcontrol_callback_t callback); - u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); -void set_efd_bitmap (u8 * bitmap, u32 value, u32 op); - struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b); struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); @@ -524,11 +470,7 @@ struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \ _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \ _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \ - _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") \ - _(IPV4_EFD_DROP_PKTS, "IPV4 Early Fast Discard rx drops") \ - _(IPV6_EFD_DROP_PKTS, "IPV6 Early Fast Discard rx drops") \ - _(MPLS_EFD_DROP_PKTS, "MPLS Early Fast Discard rx drops") \ - _(VLAN_EFD_DROP_PKTS, "VLAN Early Fast Discard rx drops") + _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") typedef enum { @@ -538,27 +480,11 @@ typedef enum DPDK_N_ERROR, } dpdk_error_t; -/* - * Increment EFD drop counter - */ -static_always_inline void -increment_efd_drop_counter (vlib_main_t * vm, u32 counter_index, u32 count) -{ - vlib_node_t *my_n; - - my_n = vlib_get_node (vm, dpdk_input_node.index); - vm->error_main.counters[my_n->error_heap_index + counter_index] += count; -} - int dpdk_set_stat_poll_interval (f64 interval); int dpdk_set_link_state_poll_interval (f64 interval); void dpdk_update_link_state (dpdk_device_t * xd, f64 now); void dpdk_device_lock_init (dpdk_device_t * xd); void dpdk_device_lock_free (dpdk_device_t * xd); -void dpdk_efd_update_counters (dpdk_device_t * xd, u32 n_buffers, - u16 enabled); -u32 is_efd_discardable (vlib_thread_main_t * tm, vlib_buffer_t * b0, - struct rte_mbuf *mb); static inline u64 vnet_get_aggregate_rx_packets (void) @@ -580,27 +506,8 @@ void dpdk_rx_trace (dpdk_main_t * dm, #define EFD_OPERATION_LESS_THAN 0 #define EFD_OPERATION_GREATER_OR_EQUAL 1 -void efd_config (u32 enabled, - u32 ip_prec, u32 ip_op, - u32 mpls_exp, u32 mpls_op, u32 vlan_cos, u32 vlan_op); - -void post_sw_interface_set_flags (vlib_main_t * vm, u32 sw_if_index, - u32 flags); - -u32 dpdk_get_admin_up_down_in_progress (void); - -u32 dpdk_num_mbufs (void); - -dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t * hi); - -i8 dpdk_get_cpu_socket (vnet_hw_interface_t * hi); - void *dpdk_input_multiarch_select (); void *dpdk_input_rss_multiarch_select (); -void *dpdk_input_efd_multiarch_select (); - -clib_error_t *dpdk_get_hw_interface_stats (u32 hw_if_index, - struct rte_eth_stats *dest); format_function_t format_dpdk_device_name; format_function_t format_dpdk_device; diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c index ee22b9a2753..e014506b9e8 100644 --- a/vnet/vnet/devices/dpdk/init.c +++ b/vnet/vnet/devices/dpdk/init.c @@ -1650,7 +1650,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) vlib_process_wait_for_event_or_clock (vm, min_wait); - if (dpdk_get_admin_up_down_in_progress ()) + if (dm->admin_up_down_in_progress) /* skip the poll if an admin up down is in progress (on any interface) */ continue; @@ -1739,13 +1739,6 @@ dpdk_init (vlib_main_t * vm) /* $$$ use n_thread_stacks since it's known-good at this point */ vec_validate (dm->recycle, tm->n_thread_stacks - 1); - /* initialize EFD (early fast discard) default settings */ - dm->efd.enabled = DPDK_EFD_DISABLED; - dm->efd.queue_hi_thresh = ((DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT * - DPDK_NB_RX_DESC_10GE) / 100); - dm->efd.consec_full_frames_hi_thresh = - DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH; - /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ dm->buffer_flags_template = (VLIB_BUFFER_TOTAL_LENGTH_VALID diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c index 02c311863fc..578a040bbbc 100644 --- a/vnet/vnet/devices/dpdk/node.c +++ b/vnet/vnet/devices/dpdk/node.c @@ -192,94 +192,6 @@ dpdk_rx_trace (dpdk_main_t * dm, } } -/* - * dpdk_efd_update_counters() - * Update EFD (early-fast-discard) counters - */ -void -dpdk_efd_update_counters (dpdk_device_t * xd, u32 n_buffers, u16 enabled) -{ - if (enabled & DPDK_EFD_MONITOR_ENABLED) - { - u64 now = clib_cpu_time_now (); - if (xd->efd_agent.last_poll_time > 0) - { - u64 elapsed_time = (now - xd->efd_agent.last_poll_time); - if (elapsed_time > xd->efd_agent.max_poll_delay) - xd->efd_agent.max_poll_delay = elapsed_time; - } - xd->efd_agent.last_poll_time = now; - } - - xd->efd_agent.total_packet_cnt += n_buffers; - xd->efd_agent.last_burst_sz = n_buffers; - - if (n_buffers > xd->efd_agent.max_burst_sz) - xd->efd_agent.max_burst_sz = n_buffers; - - if (PREDICT_FALSE (n_buffers == VLIB_FRAME_SIZE)) - { - xd->efd_agent.full_frames_cnt++; - xd->efd_agent.consec_full_frames_cnt++; - } - else - { - xd->efd_agent.consec_full_frames_cnt = 0; - } -} - -/* is_efd_discardable() - * returns non zero DPDK error if packet meets early-fast-discard criteria, - * zero otherwise - */ -u32 -is_efd_discardable (vlib_thread_main_t * tm, - vlib_buffer_t * b0, struct rte_mbuf *mb) -{ - ethernet_header_t *eh = (ethernet_header_t *) b0->data; - - if (eh->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)) - { - ip4_header_t *ipv4 = - (ip4_header_t *) & (b0->data[sizeof (ethernet_header_t)]); - u8 pkt_prec = (ipv4->tos >> 5); - - return (tm->efd.ip_prec_bitmap & (1 << pkt_prec) ? - DPDK_ERROR_IPV4_EFD_DROP_PKTS : DPDK_ERROR_NONE); - } - else if (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_IP6)) - { - ip6_header_t *ipv6 = - (ip6_header_t *) & (b0->data[sizeof (ethernet_header_t)]); - u8 pkt_tclass = - ((ipv6->ip_version_traffic_class_and_flow_label >> 20) & 0xff); - - return (tm->efd.ip_prec_bitmap & (1 << pkt_tclass) ? - DPDK_ERROR_IPV6_EFD_DROP_PKTS : DPDK_ERROR_NONE); - } - else if (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_MPLS_UNICAST)) - { - mpls_unicast_header_t *mpls = - (mpls_unicast_header_t *) & (b0->data[sizeof (ethernet_header_t)]); - u8 pkt_exp = ((mpls->label_exp_s_ttl >> 9) & 0x07); - - return (tm->efd.mpls_exp_bitmap & (1 << pkt_exp) ? - DPDK_ERROR_MPLS_EFD_DROP_PKTS : DPDK_ERROR_NONE); - } - else if ((eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_VLAN)) || - (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_DOT1AD))) - { - ethernet_vlan_header_t *vlan = - (ethernet_vlan_header_t *) & (b0->data[sizeof (ethernet_header_t)]); - u8 pkt_cos = ((vlan->priority_cfi_and_id >> 13) & 0x07); - - return (tm->efd.vlan_cos_bitmap & (1 << pkt_cos) ? - DPDK_ERROR_VLAN_EFD_DROP_PKTS : DPDK_ERROR_NONE); - } - - return DPDK_ERROR_NONE; -} - static inline u32 dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) { @@ -321,7 +233,7 @@ static inline u32 dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, vlib_node_runtime_t * node, - u32 cpu_index, u16 queue_id, int use_efd) + u32 cpu_index, u16 queue_id) { u32 n_buffers; u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; @@ -331,7 +243,6 @@ dpdk_device_input (dpdk_main_t * dm, uword n_rx_bytes = 0; u32 n_trace, trace_cnt __attribute__ ((unused)); vlib_buffer_free_list_t *fl; - u8 efd_discard_burst = 0; u32 buffer_flags_template; if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) @@ -341,13 +252,6 @@ dpdk_device_input (dpdk_main_t * dm, if (n_buffers == 0) { - /* check if EFD (dpdk) is enabled */ - if (PREDICT_FALSE (use_efd && dm->efd.enabled)) - { - /* reset a few stats */ - xd->efd_agent.last_poll_time = 0; - xd->efd_agent.last_burst_sz = 0; - } return 0; } @@ -358,44 +262,6 @@ dpdk_device_input (dpdk_main_t * dm, fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - /* Check for congestion if EFD (Early-Fast-Discard) is enabled - * in any mode (e.g. dpdk, monitor, or drop_all) - */ - if (PREDICT_FALSE (use_efd && dm->efd.enabled)) - { - /* update EFD counters */ - dpdk_efd_update_counters (xd, n_buffers, dm->efd.enabled); - - if (PREDICT_FALSE (dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED)) - { - /* discard all received packets */ - for (mb_index = 0; mb_index < n_buffers; mb_index++) - rte_pktmbuf_free (xd->rx_vectors[queue_id][mb_index]); - - xd->efd_agent.discard_cnt += n_buffers; - increment_efd_drop_counter (vm, - DPDK_ERROR_VLAN_EFD_DROP_PKTS, - n_buffers); - - return 0; - } - - if (PREDICT_FALSE (xd->efd_agent.consec_full_frames_cnt >= - dm->efd.consec_full_frames_hi_thresh)) - { - u32 device_queue_sz = rte_eth_rx_queue_count (xd->device_index, - queue_id); - if (device_queue_sz >= dm->efd.queue_hi_thresh) - { - /* dpdk device queue has reached the critical threshold */ - xd->efd_agent.congestion_cnt++; - - /* apply EFD to packets from the burst */ - efd_discard_burst = 1; - } - } - } - mb_index = 0; while (n_buffers > 0) @@ -404,7 +270,6 @@ dpdk_device_input (dpdk_main_t * dm, u8 error0; u32 l3_offset0; vlib_buffer_t *b0, *b_seg, *b_chain = 0; - u32 cntr_type; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); @@ -426,22 +291,6 @@ dpdk_device_input (dpdk_main_t * dm, b0 = vlib_buffer_from_rte_mbuf (mb); - /* check whether EFD is looking for packets to discard */ - if (PREDICT_FALSE (efd_discard_burst)) - { - vlib_thread_main_t *tm = vlib_get_thread_main (); - - if (PREDICT_TRUE (cntr_type = is_efd_discardable (tm, b0, mb))) - { - rte_pktmbuf_free (mb); - xd->efd_agent.discard_cnt++; - increment_efd_drop_counter (vm, cntr_type, 1); - n_buffers--; - mb_index++; - continue; - } - } - /* Prefetch one next segment if it exists. */ if (PREDICT_FALSE (mb->nb_segs > 1)) { @@ -642,7 +491,7 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) { xd = vec_elt_at_index(dm->devices, dq->device); ASSERT(dq->queue_id == 0); - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0, 0); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0); } /* *INDENT-ON* */ @@ -668,33 +517,7 @@ dpdk_input_rss (vlib_main_t * vm, vec_foreach (dq, dm->devices_by_cpu[cpu_index]) { xd = vec_elt_at_index(dm->devices, dq->device); - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 0); - } - /* *INDENT-ON* */ - - poll_rate_limit (dm); - - return n_rx_packets; -} - -uword -dpdk_input_efd (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * f) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - uword n_rx_packets = 0; - dpdk_device_and_queue_t *dq; - u32 cpu_index = os_get_cpu_number (); - - /* - * Poll all devices on this cpu for input/interrupts. - */ - /* *INDENT-OFF* */ - vec_foreach (dq, dm->devices_by_cpu[cpu_index]) - { - xd = vec_elt_at_index(dm->devices, dq->device); - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 1); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); } /* *INDENT-ON* */ @@ -724,53 +547,8 @@ VLIB_REGISTER_NODE (dpdk_input_node) = { /* handle dpdk_input_rss alternative function */ VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input) VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_rss) -VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_efd) /* this macro defines dpdk_input_rss_multiarch_select() */ CLIB_MULTIARCH_SELECT_FN(dpdk_input); CLIB_MULTIARCH_SELECT_FN(dpdk_input_rss); -CLIB_MULTIARCH_SELECT_FN(dpdk_input_efd); -/* - * set_efd_bitmap() - * Based on the operation type, set lower/upper bits for the given index value - */ -void -set_efd_bitmap (u8 * bitmap, u32 value, u32 op) -{ - int ix; - - *bitmap = 0; - for (ix = 0; ix < 8; ix++) - { - if (((op == EFD_OPERATION_LESS_THAN) && (ix < value)) || - ((op == EFD_OPERATION_GREATER_OR_EQUAL) && (ix >= value))) - { - (*bitmap) |= (1 << ix); - } - } -} - -void -efd_config (u32 enabled, - u32 ip_prec, u32 ip_op, - u32 mpls_exp, u32 mpls_op, u32 vlan_cos, u32 vlan_op) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - - if (enabled) - { - tm->efd.enabled |= VLIB_EFD_DISCARD_ENABLED; - dm->efd.enabled |= DPDK_EFD_DISCARD_ENABLED; - } - else - { - tm->efd.enabled &= ~VLIB_EFD_DISCARD_ENABLED; - dm->efd.enabled &= ~DPDK_EFD_DISCARD_ENABLED; - } - - set_efd_bitmap (&tm->efd.ip_prec_bitmap, ip_prec, ip_op); - set_efd_bitmap (&tm->efd.mpls_exp_bitmap, mpls_exp, mpls_op); - set_efd_bitmap (&tm->efd.vlan_cos_bitmap, vlan_cos, vlan_op); -} |