summaryrefslogtreecommitdiffstats
path: root/vnet
diff options
context:
space:
mode:
Diffstat (limited to 'vnet')
-rw-r--r--vnet/vnet/devices/dpdk/cli.c380
-rw-r--r--vnet/vnet/devices/dpdk/device.c314
-rw-r--r--vnet/vnet/devices/dpdk/dpdk.h95
-rw-r--r--vnet/vnet/devices/dpdk/init.c9
-rw-r--r--vnet/vnet/devices/dpdk/node.c228
5 files changed, 41 insertions, 985 deletions
diff --git a/vnet/vnet/devices/dpdk/cli.c b/vnet/vnet/devices/dpdk/cli.c
index 5e53a98beae..a9f91ec5a6d 100644
--- a/vnet/vnet/devices/dpdk/cli.c
+++ b/vnet/vnet/devices/dpdk/cli.c
@@ -256,386 +256,6 @@ VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = {
};
/* *INDENT-ON* */
-static void
-show_dpdk_device_stats (vlib_main_t * vm, dpdk_device_t * xd)
-{
- vlib_cli_output (vm,
- "device_index %d\n"
- " last_burst_sz %d\n"
- " max_burst_sz %d\n"
- " full_frames_cnt %u\n"
- " consec_full_frames_cnt %u\n"
- " congestion_cnt %d\n"
- " last_poll_time %llu\n"
- " max_poll_delay %llu\n"
- " discard_cnt %u\n"
- " total_packet_cnt %u\n",
- xd->device_index,
- xd->efd_agent.last_burst_sz,
- xd->efd_agent.max_burst_sz,
- xd->efd_agent.full_frames_cnt,
- xd->efd_agent.consec_full_frames_cnt,
- xd->efd_agent.congestion_cnt,
- xd->efd_agent.last_poll_time,
- xd->efd_agent.max_poll_delay,
- xd->efd_agent.discard_cnt, xd->efd_agent.total_packet_cnt);
-
- u32 device_queue_sz = rte_eth_rx_queue_count (xd->device_index,
- 0 /* queue_id */ );
- vlib_cli_output (vm, " device_queue_sz %u\n", device_queue_sz);
-}
-
-static void
-show_efd_config (vlib_main_t * vm)
-{
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- dpdk_main_t *dm = &dpdk_main;
-
- vlib_cli_output (vm,
- "dpdk: (0x%04x) enabled:%d monitor:%d drop_all:%d\n"
- " dpdk_queue_hi_thresh %d\n"
- " consec_full_frames_hi_thresh %d\n"
- "---------\n"
- "worker: (0x%04x) enabled:%d monitor:%d\n"
- " worker_queue_hi_thresh %d\n",
- dm->efd.enabled,
- ((dm->efd.enabled & DPDK_EFD_DISCARD_ENABLED) ? 1 : 0),
- ((dm->efd.enabled & DPDK_EFD_MONITOR_ENABLED) ? 1 : 0),
- ((dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED) ? 1 : 0),
- dm->efd.queue_hi_thresh,
- dm->efd.consec_full_frames_hi_thresh,
- tm->efd.enabled,
- ((tm->efd.enabled & VLIB_EFD_DISCARD_ENABLED) ? 1 : 0),
- ((dm->efd.enabled & VLIB_EFD_MONITOR_ENABLED) ? 1 : 0),
- tm->efd.queue_hi_thresh);
- vlib_cli_output (vm,
- "---------\n"
- "ip_prec_bitmap 0x%02x\n"
- "mpls_exp_bitmap 0x%02x\n"
- "vlan_cos_bitmap 0x%02x\n",
- tm->efd.ip_prec_bitmap,
- tm->efd.mpls_exp_bitmap, tm->efd.vlan_cos_bitmap);
-}
-
-static clib_error_t *
-show_efd (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
-
- if (unformat (input, "config"))
- {
- show_efd_config (vm);
- }
- else if (unformat (input, "dpdk"))
- {
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
- u32 device_id = ~0;
-
- (void) unformat (input, "device %d", &device_id);
- /* *INDENT-OFF* */
- vec_foreach (xd, dm->devices)
- {
- if ((xd->device_index == device_id) || (device_id == ~0))
- {
- show_dpdk_device_stats(vm, xd);
- }
- }
- /* *INDENT-ON* */
- }
- else if (unformat (input, "worker"))
- {
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- vlib_thread_registration_t *tr;
- u32 num_workers = 0;
- u32 first_worker_index = 0;
- uword *p;
-
- p = hash_get_mem (tm->thread_registrations_by_name, "workers");
- ASSERT (p);
- tr = (vlib_thread_registration_t *) p[0];
- if (tr)
- {
- num_workers = tr->count;
- first_worker_index = tr->first_index;
- }
-
- vlib_cli_output (vm,
- "num_workers %d\n"
- "first_worker_index %d\n",
- num_workers, first_worker_index);
-
- }
- else if (unformat (input, "help"))
- {
- vlib_cli_output (vm, "Usage: show efd config | "
- "dpdk [device <id>] | worker\n");
- }
- else
- {
- show_efd_config (vm);
- }
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_efd_command, static) = {
- .path = "show efd",
- .short_help = "Show efd [device <id>] | [config]",
- .function = show_efd,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-clear_efd (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
-
- /* *INDENT-OFF* */
- vec_foreach (xd, dm->devices)
- {
- xd->efd_agent.last_burst_sz = 0;
- xd->efd_agent.max_burst_sz = 0;
- xd->efd_agent.full_frames_cnt = 0;
- xd->efd_agent.consec_full_frames_cnt = 0;
- xd->efd_agent.congestion_cnt = 0;
- xd->efd_agent.last_poll_time = 0;
- xd->efd_agent.max_poll_delay = 0;
- xd->efd_agent.discard_cnt = 0;
- xd->efd_agent.total_packet_cnt = 0;
- }
- /* *INDENT-ON* */
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (clear_efd_command,static) = {
- .path = "clear efd",
- .short_help = "Clear early-fast-discard counters",
- .function = clear_efd,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-parse_op_and_prec (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd,
- char *prec_type, u8 * prec_bitmap)
-{
- clib_error_t *error = NULL;
- u8 op = 0;
- u8 prec = 0;
-
- if (unformat (input, "ge"))
- {
- op = EFD_OPERATION_GREATER_OR_EQUAL;
- }
- else if (unformat (input, "lt"))
- {
- op = EFD_OPERATION_LESS_THAN;
- }
- else if (unformat (input, "help"))
- {
- vlib_cli_output (vm, "enter operation [ge | lt] and precedence <0-7>)");
- return (error);
- }
- else
- {
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
-
- if (unformat (input, "%u", &prec))
- {
- if (prec > 7)
- {
- return clib_error_return (0, "precedence %d is out of range <0-7>",
- prec);
- }
- }
- else
- {
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
-
- set_efd_bitmap (prec_bitmap, prec, op);
-
- vlib_cli_output (vm,
- "EFD will be set for %s precedence %s%u%s.",
- prec_type,
- (op == EFD_OPERATION_LESS_THAN) ? "less than " : "",
- prec,
- (op ==
- EFD_OPERATION_GREATER_OR_EQUAL) ? " and greater" : "");
-
- return (error);
-}
-
-
-static clib_error_t *
-set_efd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- dpdk_main_t *dm = &dpdk_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- clib_error_t *error = NULL;
- vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, dpdk_input_node.index);
-
- if (unformat (input, "enable"))
- {
- if (unformat (input, "dpdk"))
- {
- dm->efd.enabled |= DPDK_EFD_DISCARD_ENABLED;
- }
- else if (unformat (input, "worker"))
- {
- tm->efd.enabled |= VLIB_EFD_DISCARD_ENABLED;
- }
- else if (unformat (input, "monitor"))
- {
- dm->efd.enabled |= DPDK_EFD_MONITOR_ENABLED;
- tm->efd.enabled |= VLIB_EFD_MONITOR_ENABLED;
- }
- else if (unformat (input, "drop_all"))
- {
- dm->efd.enabled |= DPDK_EFD_DROPALL_ENABLED;
- }
- else if (unformat (input, "default"))
- {
- dm->efd.enabled = (DPDK_EFD_DISCARD_ENABLED |
- DPDK_EFD_MONITOR_ENABLED);
- tm->efd.enabled = (VLIB_EFD_DISCARD_ENABLED |
- VLIB_EFD_MONITOR_ENABLED);
- }
- else
- {
- return clib_error_return (0, "Usage: set efd enable [dpdk | "
- "worker | monitor | drop_all | default]");
- }
- }
- else if (unformat (input, "disable"))
- {
- if (unformat (input, "dpdk"))
- {
- dm->efd.enabled &= ~DPDK_EFD_DISCARD_ENABLED;
- }
- else if (unformat (input, "worker"))
- {
- tm->efd.enabled &= ~VLIB_EFD_DISCARD_ENABLED;
- }
- else if (unformat (input, "monitor"))
- {
- dm->efd.enabled &= ~DPDK_EFD_MONITOR_ENABLED;
- tm->efd.enabled &= ~VLIB_EFD_MONITOR_ENABLED;
- }
- else if (unformat (input, "drop_all"))
- {
- dm->efd.enabled &= ~DPDK_EFD_DROPALL_ENABLED;
- }
- else if (unformat (input, "all"))
- {
- dm->efd.enabled = 0;
- tm->efd.enabled = 0;
- }
- else
- {
- return clib_error_return (0, "Usage: set efd disable [dpdk | "
- "worker | monitor | drop_all | all]");
- }
- }
- else if (unformat (input, "worker_queue_hi_thresh"))
- {
- u32 mark;
- if (unformat (input, "%u", &mark))
- {
- tm->efd.queue_hi_thresh = mark;
- }
- else
- {
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
- }
- else if (unformat (input, "dpdk_device_hi_thresh"))
- {
- u32 thresh;
- if (unformat (input, "%u", &thresh))
- {
- dm->efd.queue_hi_thresh = thresh;
- }
- else
- {
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
- }
- else if (unformat (input, "consec_full_frames_hi_thresh"))
- {
- u32 thresh;
- if (unformat (input, "%u", &thresh))
- {
- dm->efd.consec_full_frames_hi_thresh = thresh;
- }
- else
- {
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
- }
- else if (unformat (input, "ip-prec"))
- {
- return (parse_op_and_prec (vm, input, cmd,
- "ip", &tm->efd.ip_prec_bitmap));
- }
- else if (unformat (input, "mpls-exp"))
- {
- return (parse_op_and_prec (vm, input, cmd,
- "mpls", &tm->efd.mpls_exp_bitmap));
- }
- else if (unformat (input, "vlan-cos"))
- {
- return (parse_op_and_prec (vm, input, cmd,
- "vlan", &tm->efd.vlan_cos_bitmap));
- }
- else if (unformat (input, "help"))
- {
- vlib_cli_output (vm,
- "Usage:\n"
- " set efd enable <dpdk | worker | monitor | drop_all | default> |\n"
- " set efd disable <dpdk | worker | monitor | drop_all | all> |\n"
- " set efd <ip-prec | mpls-exp | vlan-cos> <ge | lt> <0-7>\n"
- " set efd worker_queue_hi_thresh <0-32> |\n"
- " set efd dpdk_device_hi_thresh <0-%d> |\n"
- " set efd consec_full_frames_hi_thresh <count> |\n",
- DPDK_NB_RX_DESC_10GE);
- }
- else
- {
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
-
- if (dm->efd.enabled)
- rt->function = dpdk_input_efd_multiarch_select ();
- else if (dm->use_rss)
- rt->function = dpdk_input_rss_multiarch_select ();
- else
- rt->function = dpdk_input_multiarch_select ();
-
- return error;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_set_efd,static) = {
- .path = "set efd",
- .short_help = "set early-fast-discard commands",
- .function = set_efd,
-};
-/* *INDENT-ON* */
-
static clib_error_t *
set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
diff --git a/vnet/vnet/devices/dpdk/device.c b/vnet/vnet/devices/dpdk/device.c
index 411380633e7..d69ee3e453b 100644
--- a/vnet/vnet/devices/dpdk/device.c
+++ b/vnet/vnet/devices/dpdk/device.c
@@ -254,13 +254,6 @@ dpdk_tx_trace_buffer (dpdk_main_t * dm,
* on the tx_vector. If all packets are transmitted (the normal case), the
* function returns 0.
*
- * The tx_burst function may not be able to transmit all packets because the
- * dpdk ring is full. If a flowcontrol callback function has been configured
- * then the function simply returns. If no callback has been configured, the
- * function will retry calling tx_burst with the remaining packets. This will
- * continue until all packets are transmitted or tx_burst indicates no packets
- * could be transmitted. (The caller can drop the remaining packets.)
- *
* The function assumes there is at least one packet on the tx_vector.
*/
static_always_inline
@@ -297,21 +290,9 @@ static_always_inline
* calls due to a ring wrap.
*/
ASSERT (n_packets < xd->nb_tx_desc);
+ ASSERT (ring->tx_tail == 0);
- /*
- * If there is no flowcontrol callback, there is only temporary buffering
- * on the tx_vector and so the tail should always be 0.
- */
- ASSERT (dm->flowcontrol_callback || ring->tx_tail == 0);
-
- /*
- * If there is a flowcontrol callback, don't retry any incomplete tx_bursts.
- * Apply backpressure instead. If there is no callback, keep retrying until
- * a tx_burst sends no packets. n_retry of 255 essentially means no retry
- * limit.
- */
- n_retry = dm->flowcontrol_callback ? 0 : 255;
-
+ n_retry = 16;
queue_id = vm->cpu_index;
do
@@ -331,78 +312,25 @@ static_always_inline
queue_id = (queue_id + 1) % xd->tx_q_used;
}
- if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */
+ if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */
{
- if (PREDICT_TRUE (tx_head > tx_tail))
- {
- /* no wrap, transmit in one burst */
- dpdk_device_hqos_per_worker_thread_t *hqos =
- &xd->hqos_wt[vm->cpu_index];
-
- dpdk_hqos_metadata_set (hqos,
- &tx_vector[tx_tail], tx_head - tx_tail);
- rv = rte_ring_sp_enqueue_burst (hqos->swq,
- (void **) &tx_vector[tx_tail],
- (uint16_t) (tx_head - tx_tail));
- }
- else
- {
- /*
- * This can only happen if there is a flowcontrol callback.
- * We need to split the transmit into two calls: one for
- * the packets up to the wrap point, and one to continue
- * at the start of the ring.
- * Transmit pkts up to the wrap point.
- */
- dpdk_device_hqos_per_worker_thread_t *hqos =
- &xd->hqos_wt[vm->cpu_index];
-
- dpdk_hqos_metadata_set (hqos,
- &tx_vector[tx_tail],
- xd->nb_tx_desc - tx_tail);
- rv = rte_ring_sp_enqueue_burst (hqos->swq,
- (void **) &tx_vector[tx_tail],
- (uint16_t) (xd->nb_tx_desc -
- tx_tail));
- /*
- * If we transmitted everything we wanted, then allow 1 retry
- * so we can try to transmit the rest. If we didn't transmit
- * everything, stop now.
- */
- n_retry = (rv == xd->nb_tx_desc - tx_tail) ? 1 : 0;
- }
+ /* no wrap, transmit in one burst */
+ dpdk_device_hqos_per_worker_thread_t *hqos =
+ &xd->hqos_wt[vm->cpu_index];
+
+ dpdk_hqos_metadata_set (hqos,
+ &tx_vector[tx_tail], tx_head - tx_tail);
+ rv = rte_ring_sp_enqueue_burst (hqos->swq,
+ (void **) &tx_vector[tx_tail],
+ (uint16_t) (tx_head - tx_tail));
}
else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD))
{
- if (PREDICT_TRUE (tx_head > tx_tail))
- {
- /* no wrap, transmit in one burst */
- rv = rte_eth_tx_burst (xd->device_index,
- (uint16_t) queue_id,
- &tx_vector[tx_tail],
- (uint16_t) (tx_head - tx_tail));
- }
- else
- {
- /*
- * This can only happen if there is a flowcontrol callback.
- * We need to split the transmit into two calls: one for
- * the packets up to the wrap point, and one to continue
- * at the start of the ring.
- * Transmit pkts up to the wrap point.
- */
- rv = rte_eth_tx_burst (xd->device_index,
- (uint16_t) queue_id,
- &tx_vector[tx_tail],
- (uint16_t) (xd->nb_tx_desc - tx_tail));
-
- /*
- * If we transmitted everything we wanted, then allow 1 retry
- * so we can try to transmit the rest. If we didn't transmit
- * everything, stop now.
- */
- n_retry = (rv == xd->nb_tx_desc - tx_tail) ? 1 : 0;
- }
+ /* no wrap, transmit in one burst */
+ rv = rte_eth_tx_burst (xd->device_index,
+ (uint16_t) queue_id,
+ &tx_vector[tx_tail],
+ (uint16_t) (tx_head - tx_tail));
}
else
{
@@ -436,58 +364,11 @@ static_always_inline
return n_packets;
}
-
-/*
- * This function transmits any packets on the interface's tx_vector and returns
- * the number of packets untransmitted on the tx_vector. If the tx_vector is
- * empty the function simply returns 0.
- *
- * It is intended to be called by a traffic manager which has flowed-off an
- * interface to see if the interface can be flowed-on again.
- */
-u32
-dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
- int queue_id;
- struct rte_mbuf **tx_vector;
- tx_ring_hdr_t *ring;
-
- /* param is dev_instance and not hw_if_index to save another lookup */
- xd = vec_elt_at_index (dm->devices, dev_instance);
-
- queue_id = vm->cpu_index;
- tx_vector = xd->tx_vectors[queue_id];
-
- /* If no packets on the ring, don't bother calling tx function */
- ring = vec_header (tx_vector, sizeof (*ring));
- if (ring->tx_head == ring->tx_tail)
- {
- return 0;
- }
-
- return tx_burst_vector_internal (vm, xd, tx_vector);
-}
-
/*
* Transmits the packets on the frame to the interface associated with the
* node. It first copies packets on the frame to a tx_vector containing the
* rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal
* which calls the dpdk tx_burst function.
- *
- * The tx_vector is treated slightly differently depending on whether or
- * not a flowcontrol callback function has been configured. If there is no
- * callback, the tx_vector is a temporary array of rte_mbuf packet pointers.
- * Its entries are written and consumed before the function exits.
- *
- * If there is a callback then the transmit is being invoked in the presence
- * of a traffic manager. Here the tx_vector is treated like a ring of rte_mbuf
- * pointers. If not all packets can be transmitted, the untransmitted packets
- * stay on the tx_vector until the next call. The callback allows the traffic
- * manager to flow-off dequeues to the interface. The companion function
- * dpdk_interface_tx_vector() allows the traffic manager to detect when
- * it should flow-on the interface again.
*/
static uword
dpdk_interface_tx (vlib_main_t * vm,
@@ -745,46 +626,30 @@ dpdk_interface_tx (vlib_main_t * vm,
*/
tx_pkts = n_on_ring - n_packets;
- if (PREDICT_FALSE (dm->flowcontrol_callback != 0))
- {
- if (PREDICT_FALSE (n_packets))
- {
- /* Callback may want to enable flowcontrol */
- dm->flowcontrol_callback (vm, xd->vlib_hw_if_index,
- ring->tx_head - ring->tx_tail);
- }
- else
- {
- /* Reset head/tail to avoid unnecessary wrap */
- ring->tx_head = 0;
- ring->tx_tail = 0;
- }
- }
- else
- {
- /* If there is no callback then drop any non-transmitted packets */
- if (PREDICT_FALSE (n_packets))
- {
- vlib_simple_counter_main_t *cm;
- vnet_main_t *vnm = vnet_get_main ();
+ {
+ /* If there is no callback then drop any non-transmitted packets */
+ if (PREDICT_FALSE (n_packets))
+ {
+ vlib_simple_counter_main_t *cm;
+ vnet_main_t *vnm = vnet_get_main ();
- cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
- VNET_INTERFACE_COUNTER_TX_ERROR);
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_TX_ERROR);
- vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
- n_packets);
+ vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+ n_packets);
- vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
- n_packets);
+ vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
+ n_packets);
- while (n_packets--)
- rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]);
- }
+ while (n_packets--)
+ rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]);
+ }
- /* Reset head/tail to avoid unnecessary wrap */
- ring->tx_head = 0;
- ring->tx_tail = 0;
- }
+ /* Reset head/tail to avoid unnecessary wrap */
+ ring->tx_head = 0;
+ ring->tx_tail = 0;
+ }
/* Recycle replicated buffers */
if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu])))
@@ -981,22 +846,8 @@ VNET_DEVICE_CLASS (dpdk_device_class) = {
VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx)
/* *INDENT-ON* */
-void
-dpdk_set_flowcontrol_callback (vlib_main_t * vm,
- dpdk_flowcontrol_callback_t callback)
-{
- dpdk_main.flowcontrol_callback = callback;
-}
-
#define UP_DOWN_FLAG_EVENT 1
-
-u32
-dpdk_get_admin_up_down_in_progress (void)
-{
- return dpdk_main.admin_up_down_in_progress;
-}
-
uword
admin_up_down_process (vlib_main_t * vm,
vlib_node_runtime_t * rt, vlib_frame_t * f)
@@ -1050,99 +901,6 @@ VLIB_REGISTER_NODE (admin_up_down_process_node,static) = {
/* *INDENT-ON* */
/*
- * Asynchronously invoke vnet_sw_interface_set_flags via the admin_up_down
- * process. Useful for avoiding long blocking delays (>150ms) in the dpdk
- * drivers.
- * WARNING: when posting this event, no other interface-related calls should
- * be made (e.g. vnet_create_sw_interface()) while the event is being
- * processed (admin_up_down_in_progress). This is required in order to avoid
- * race conditions in manipulating interface data structures.
- */
-void
-post_sw_interface_set_flags (vlib_main_t * vm, u32 sw_if_index, u32 flags)
-{
- uword *d = vlib_process_signal_event_data
- (vm, admin_up_down_process_node.index,
- UP_DOWN_FLAG_EVENT, 2, sizeof (u32));
- d[0] = sw_if_index;
- d[1] = flags;
-}
-
-/*
- * Return a copy of the DPDK port stats in dest.
- */
-clib_error_t *
-dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats *dest)
-{
- dpdk_main_t *dm = &dpdk_main;
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
-
- if (!dest)
- {
- return clib_error_return (0, "Missing or NULL argument");
- }
- if (!xd)
- {
- return clib_error_return (0,
- "Unable to get DPDK device from HW interface");
- }
-
- dpdk_update_counters (xd, vlib_time_now (dm->vlib_main));
-
- clib_memcpy (dest, &xd->stats, sizeof (xd->stats));
- return (0);
-}
-
-/*
- * Return the number of dpdk mbufs
- */
-u32
-dpdk_num_mbufs (void)
-{
- dpdk_main_t *dm = &dpdk_main;
-
- return dm->conf->num_mbufs;
-}
-
-/*
- * Return the pmd type for a given hardware interface
- */
-dpdk_pmd_t
-dpdk_get_pmd_type (vnet_hw_interface_t * hi)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
-
- assert (hi);
-
- xd = vec_elt_at_index (dm->devices, hi->dev_instance);
-
- assert (xd);
-
- return xd->pmd;
-}
-
-/*
- * Return the cpu socket for a given hardware interface
- */
-i8
-dpdk_get_cpu_socket (vnet_hw_interface_t * hi)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
-
- assert (hi);
-
- xd = vec_elt_at_index (dm->devices, hi->dev_instance);
-
- assert (xd);
-
- return xd->cpu_socket;
-}
-
-/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h
index 1d2b4b7655e..ad973323359 100644
--- a/vnet/vnet/devices/dpdk/dpdk.h
+++ b/vnet/vnet/devices/dpdk/dpdk.h
@@ -124,30 +124,6 @@ typedef enum
VNET_DPDK_PORT_TYPE_UNKNOWN,
} dpdk_port_type_t;
-typedef struct
-{
- f64 deadline;
- vlib_frame_t *frame;
-} dpdk_frame_t;
-
-#define DPDK_EFD_MAX_DISCARD_RATE 10
-
-typedef struct
-{
- u16 last_burst_sz;
- u16 max_burst_sz;
- u32 full_frames_cnt;
- u32 consec_full_frames_cnt;
- u32 congestion_cnt;
- u64 last_poll_time;
- u64 max_poll_delay;
- u32 discard_cnt;
- u32 total_packet_cnt;
-} dpdk_efd_agent_t;
-
-typedef void (*dpdk_flowcontrol_callback_t) (vlib_main_t * vm,
- u32 hw_if_index, u32 n_packets);
-
/*
* The header for the tx_vector in dpdk_device_t.
* Head and tail are indexes into the tx_vector and are of type
@@ -253,8 +229,6 @@ typedef struct
struct rte_eth_xstat *last_cleared_xstats;
f64 time_last_stats_update;
dpdk_port_type_t port_type;
-
- dpdk_efd_agent_t efd_agent;
} dpdk_device_t;
#define DPDK_STATS_POLL_INTERVAL (10.0)
@@ -285,23 +259,6 @@ typedef struct
u16 queue_id;
} dpdk_device_and_queue_t;
-/* Early-Fast-Discard (EFD) */
-#define DPDK_EFD_DISABLED 0
-#define DPDK_EFD_DISCARD_ENABLED (1 << 0)
-#define DPDK_EFD_MONITOR_ENABLED (1 << 1)
-#define DPDK_EFD_DROPALL_ENABLED (1 << 2)
-
-#define DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT 90
-#define DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH 6
-
-typedef struct dpdk_efd_t
-{
- u16 enabled;
- u16 queue_hi_thresh;
- u16 consec_full_frames_hi_thresh;
- u16 pad;
-} dpdk_efd_t;
-
#ifndef DPDK_HQOS_DBG_BYPASS
#define DPDK_HQOS_DBG_BYPASS 0
#endif
@@ -413,9 +370,6 @@ typedef struct
/* buffer flags template, configurable to enable/disable tcp / udp cksum */
u32 buffer_flags_template;
- /* flow control callback. If 0 then flow control is disabled */
- dpdk_flowcontrol_callback_t flowcontrol_callback;
-
/* vlib buffer free list, must be same size as an rte_mbuf */
u32 vlib_buffer_free_list_index;
@@ -441,9 +395,6 @@ typedef struct
uword *vu_sw_if_index_by_sock_fd;
u32 *vu_inactive_interfaces_device_index;
- /* efd (early-fast-discard) settings */
- dpdk_efd_t efd;
-
/*
* flag indicating that a posted admin up/down
* (via post_sw_interface_set_flags) is in progress
@@ -506,13 +457,8 @@ void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd);
clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd);
-void dpdk_set_flowcontrol_callback (vlib_main_t * vm,
- dpdk_flowcontrol_callback_t callback);
-
u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance);
-void set_efd_bitmap (u8 * bitmap, u32 value, u32 op);
-
struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b);
struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b);
@@ -524,11 +470,7 @@ struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b);
_(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \
_(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \
_(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \
- _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") \
- _(IPV4_EFD_DROP_PKTS, "IPV4 Early Fast Discard rx drops") \
- _(IPV6_EFD_DROP_PKTS, "IPV6 Early Fast Discard rx drops") \
- _(MPLS_EFD_DROP_PKTS, "MPLS Early Fast Discard rx drops") \
- _(VLAN_EFD_DROP_PKTS, "VLAN Early Fast Discard rx drops")
+ _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error")
typedef enum
{
@@ -538,27 +480,11 @@ typedef enum
DPDK_N_ERROR,
} dpdk_error_t;
-/*
- * Increment EFD drop counter
- */
-static_always_inline void
-increment_efd_drop_counter (vlib_main_t * vm, u32 counter_index, u32 count)
-{
- vlib_node_t *my_n;
-
- my_n = vlib_get_node (vm, dpdk_input_node.index);
- vm->error_main.counters[my_n->error_heap_index + counter_index] += count;
-}
-
int dpdk_set_stat_poll_interval (f64 interval);
int dpdk_set_link_state_poll_interval (f64 interval);
void dpdk_update_link_state (dpdk_device_t * xd, f64 now);
void dpdk_device_lock_init (dpdk_device_t * xd);
void dpdk_device_lock_free (dpdk_device_t * xd);
-void dpdk_efd_update_counters (dpdk_device_t * xd, u32 n_buffers,
- u16 enabled);
-u32 is_efd_discardable (vlib_thread_main_t * tm, vlib_buffer_t * b0,
- struct rte_mbuf *mb);
static inline u64
vnet_get_aggregate_rx_packets (void)
@@ -580,27 +506,8 @@ void dpdk_rx_trace (dpdk_main_t * dm,
#define EFD_OPERATION_LESS_THAN 0
#define EFD_OPERATION_GREATER_OR_EQUAL 1
-void efd_config (u32 enabled,
- u32 ip_prec, u32 ip_op,
- u32 mpls_exp, u32 mpls_op, u32 vlan_cos, u32 vlan_op);
-
-void post_sw_interface_set_flags (vlib_main_t * vm, u32 sw_if_index,
- u32 flags);
-
-u32 dpdk_get_admin_up_down_in_progress (void);
-
-u32 dpdk_num_mbufs (void);
-
-dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t * hi);
-
-i8 dpdk_get_cpu_socket (vnet_hw_interface_t * hi);
-
void *dpdk_input_multiarch_select ();
void *dpdk_input_rss_multiarch_select ();
-void *dpdk_input_efd_multiarch_select ();
-
-clib_error_t *dpdk_get_hw_interface_stats (u32 hw_if_index,
- struct rte_eth_stats *dest);
format_function_t format_dpdk_device_name;
format_function_t format_dpdk_device;
diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c
index ee22b9a2753..e014506b9e8 100644
--- a/vnet/vnet/devices/dpdk/init.c
+++ b/vnet/vnet/devices/dpdk/init.c
@@ -1650,7 +1650,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
vlib_process_wait_for_event_or_clock (vm, min_wait);
- if (dpdk_get_admin_up_down_in_progress ())
+ if (dm->admin_up_down_in_progress)
/* skip the poll if an admin up down is in progress (on any interface) */
continue;
@@ -1739,13 +1739,6 @@ dpdk_init (vlib_main_t * vm)
/* $$$ use n_thread_stacks since it's known-good at this point */
vec_validate (dm->recycle, tm->n_thread_stacks - 1);
- /* initialize EFD (early fast discard) default settings */
- dm->efd.enabled = DPDK_EFD_DISABLED;
- dm->efd.queue_hi_thresh = ((DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT *
- DPDK_NB_RX_DESC_10GE) / 100);
- dm->efd.consec_full_frames_hi_thresh =
- DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH;
-
/* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
dm->buffer_flags_template =
(VLIB_BUFFER_TOTAL_LENGTH_VALID
diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c
index 02c311863fc..578a040bbbc 100644
--- a/vnet/vnet/devices/dpdk/node.c
+++ b/vnet/vnet/devices/dpdk/node.c
@@ -192,94 +192,6 @@ dpdk_rx_trace (dpdk_main_t * dm,
}
}
-/*
- * dpdk_efd_update_counters()
- * Update EFD (early-fast-discard) counters
- */
-void
-dpdk_efd_update_counters (dpdk_device_t * xd, u32 n_buffers, u16 enabled)
-{
- if (enabled & DPDK_EFD_MONITOR_ENABLED)
- {
- u64 now = clib_cpu_time_now ();
- if (xd->efd_agent.last_poll_time > 0)
- {
- u64 elapsed_time = (now - xd->efd_agent.last_poll_time);
- if (elapsed_time > xd->efd_agent.max_poll_delay)
- xd->efd_agent.max_poll_delay = elapsed_time;
- }
- xd->efd_agent.last_poll_time = now;
- }
-
- xd->efd_agent.total_packet_cnt += n_buffers;
- xd->efd_agent.last_burst_sz = n_buffers;
-
- if (n_buffers > xd->efd_agent.max_burst_sz)
- xd->efd_agent.max_burst_sz = n_buffers;
-
- if (PREDICT_FALSE (n_buffers == VLIB_FRAME_SIZE))
- {
- xd->efd_agent.full_frames_cnt++;
- xd->efd_agent.consec_full_frames_cnt++;
- }
- else
- {
- xd->efd_agent.consec_full_frames_cnt = 0;
- }
-}
-
-/* is_efd_discardable()
- * returns non zero DPDK error if packet meets early-fast-discard criteria,
- * zero otherwise
- */
-u32
-is_efd_discardable (vlib_thread_main_t * tm,
- vlib_buffer_t * b0, struct rte_mbuf *mb)
-{
- ethernet_header_t *eh = (ethernet_header_t *) b0->data;
-
- if (eh->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4))
- {
- ip4_header_t *ipv4 =
- (ip4_header_t *) & (b0->data[sizeof (ethernet_header_t)]);
- u8 pkt_prec = (ipv4->tos >> 5);
-
- return (tm->efd.ip_prec_bitmap & (1 << pkt_prec) ?
- DPDK_ERROR_IPV4_EFD_DROP_PKTS : DPDK_ERROR_NONE);
- }
- else if (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_IP6))
- {
- ip6_header_t *ipv6 =
- (ip6_header_t *) & (b0->data[sizeof (ethernet_header_t)]);
- u8 pkt_tclass =
- ((ipv6->ip_version_traffic_class_and_flow_label >> 20) & 0xff);
-
- return (tm->efd.ip_prec_bitmap & (1 << pkt_tclass) ?
- DPDK_ERROR_IPV6_EFD_DROP_PKTS : DPDK_ERROR_NONE);
- }
- else if (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_MPLS_UNICAST))
- {
- mpls_unicast_header_t *mpls =
- (mpls_unicast_header_t *) & (b0->data[sizeof (ethernet_header_t)]);
- u8 pkt_exp = ((mpls->label_exp_s_ttl >> 9) & 0x07);
-
- return (tm->efd.mpls_exp_bitmap & (1 << pkt_exp) ?
- DPDK_ERROR_MPLS_EFD_DROP_PKTS : DPDK_ERROR_NONE);
- }
- else if ((eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_VLAN)) ||
- (eh->type == clib_net_to_host_u16 (ETHERNET_TYPE_DOT1AD)))
- {
- ethernet_vlan_header_t *vlan =
- (ethernet_vlan_header_t *) & (b0->data[sizeof (ethernet_header_t)]);
- u8 pkt_cos = ((vlan->priority_cfi_and_id >> 13) & 0x07);
-
- return (tm->efd.vlan_cos_bitmap & (1 << pkt_cos) ?
- DPDK_ERROR_VLAN_EFD_DROP_PKTS : DPDK_ERROR_NONE);
- }
-
- return DPDK_ERROR_NONE;
-}
-
static inline u32
dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id)
{
@@ -321,7 +233,7 @@ static inline u32
dpdk_device_input (dpdk_main_t * dm,
dpdk_device_t * xd,
vlib_node_runtime_t * node,
- u32 cpu_index, u16 queue_id, int use_efd)
+ u32 cpu_index, u16 queue_id)
{
u32 n_buffers;
u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
@@ -331,7 +243,6 @@ dpdk_device_input (dpdk_main_t * dm,
uword n_rx_bytes = 0;
u32 n_trace, trace_cnt __attribute__ ((unused));
vlib_buffer_free_list_t *fl;
- u8 efd_discard_burst = 0;
u32 buffer_flags_template;
if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
@@ -341,13 +252,6 @@ dpdk_device_input (dpdk_main_t * dm,
if (n_buffers == 0)
{
- /* check if EFD (dpdk) is enabled */
- if (PREDICT_FALSE (use_efd && dm->efd.enabled))
- {
- /* reset a few stats */
- xd->efd_agent.last_poll_time = 0;
- xd->efd_agent.last_burst_sz = 0;
- }
return 0;
}
@@ -358,44 +262,6 @@ dpdk_device_input (dpdk_main_t * dm,
fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
- /* Check for congestion if EFD (Early-Fast-Discard) is enabled
- * in any mode (e.g. dpdk, monitor, or drop_all)
- */
- if (PREDICT_FALSE (use_efd && dm->efd.enabled))
- {
- /* update EFD counters */
- dpdk_efd_update_counters (xd, n_buffers, dm->efd.enabled);
-
- if (PREDICT_FALSE (dm->efd.enabled & DPDK_EFD_DROPALL_ENABLED))
- {
- /* discard all received packets */
- for (mb_index = 0; mb_index < n_buffers; mb_index++)
- rte_pktmbuf_free (xd->rx_vectors[queue_id][mb_index]);
-
- xd->efd_agent.discard_cnt += n_buffers;
- increment_efd_drop_counter (vm,
- DPDK_ERROR_VLAN_EFD_DROP_PKTS,
- n_buffers);
-
- return 0;
- }
-
- if (PREDICT_FALSE (xd->efd_agent.consec_full_frames_cnt >=
- dm->efd.consec_full_frames_hi_thresh))
- {
- u32 device_queue_sz = rte_eth_rx_queue_count (xd->device_index,
- queue_id);
- if (device_queue_sz >= dm->efd.queue_hi_thresh)
- {
- /* dpdk device queue has reached the critical threshold */
- xd->efd_agent.congestion_cnt++;
-
- /* apply EFD to packets from the burst */
- efd_discard_burst = 1;
- }
- }
- }
-
mb_index = 0;
while (n_buffers > 0)
@@ -404,7 +270,6 @@ dpdk_device_input (dpdk_main_t * dm,
u8 error0;
u32 l3_offset0;
vlib_buffer_t *b0, *b_seg, *b_chain = 0;
- u32 cntr_type;
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -426,22 +291,6 @@ dpdk_device_input (dpdk_main_t * dm,
b0 = vlib_buffer_from_rte_mbuf (mb);
- /* check whether EFD is looking for packets to discard */
- if (PREDICT_FALSE (efd_discard_burst))
- {
- vlib_thread_main_t *tm = vlib_get_thread_main ();
-
- if (PREDICT_TRUE (cntr_type = is_efd_discardable (tm, b0, mb)))
- {
- rte_pktmbuf_free (mb);
- xd->efd_agent.discard_cnt++;
- increment_efd_drop_counter (vm, cntr_type, 1);
- n_buffers--;
- mb_index++;
- continue;
- }
- }
-
/* Prefetch one next segment if it exists. */
if (PREDICT_FALSE (mb->nb_segs > 1))
{
@@ -642,7 +491,7 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
{
xd = vec_elt_at_index(dm->devices, dq->device);
ASSERT(dq->queue_id == 0);
- n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0, 0);
+ n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, 0);
}
/* *INDENT-ON* */
@@ -668,33 +517,7 @@ dpdk_input_rss (vlib_main_t * vm,
vec_foreach (dq, dm->devices_by_cpu[cpu_index])
{
xd = vec_elt_at_index(dm->devices, dq->device);
- n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 0);
- }
- /* *INDENT-ON* */
-
- poll_rate_limit (dm);
-
- return n_rx_packets;
-}
-
-uword
-dpdk_input_efd (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * f)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
- uword n_rx_packets = 0;
- dpdk_device_and_queue_t *dq;
- u32 cpu_index = os_get_cpu_number ();
-
- /*
- * Poll all devices on this cpu for input/interrupts.
- */
- /* *INDENT-OFF* */
- vec_foreach (dq, dm->devices_by_cpu[cpu_index])
- {
- xd = vec_elt_at_index(dm->devices, dq->device);
- n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, 1);
+ n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id);
}
/* *INDENT-ON* */
@@ -724,53 +547,8 @@ VLIB_REGISTER_NODE (dpdk_input_node) = {
/* handle dpdk_input_rss alternative function */
VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input)
VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_rss)
-VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_efd)
/* this macro defines dpdk_input_rss_multiarch_select() */
CLIB_MULTIARCH_SELECT_FN(dpdk_input);
CLIB_MULTIARCH_SELECT_FN(dpdk_input_rss);
-CLIB_MULTIARCH_SELECT_FN(dpdk_input_efd);
-/*
- * set_efd_bitmap()
- * Based on the operation type, set lower/upper bits for the given index value
- */
-void
-set_efd_bitmap (u8 * bitmap, u32 value, u32 op)
-{
- int ix;
-
- *bitmap = 0;
- for (ix = 0; ix < 8; ix++)
- {
- if (((op == EFD_OPERATION_LESS_THAN) && (ix < value)) ||
- ((op == EFD_OPERATION_GREATER_OR_EQUAL) && (ix >= value)))
- {
- (*bitmap) |= (1 << ix);
- }
- }
-}
-
-void
-efd_config (u32 enabled,
- u32 ip_prec, u32 ip_op,
- u32 mpls_exp, u32 mpls_op, u32 vlan_cos, u32 vlan_op)
-{
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- dpdk_main_t *dm = &dpdk_main;
-
- if (enabled)
- {
- tm->efd.enabled |= VLIB_EFD_DISCARD_ENABLED;
- dm->efd.enabled |= DPDK_EFD_DISCARD_ENABLED;
- }
- else
- {
- tm->efd.enabled &= ~VLIB_EFD_DISCARD_ENABLED;
- dm->efd.enabled &= ~DPDK_EFD_DISCARD_ENABLED;
- }
-
- set_efd_bitmap (&tm->efd.ip_prec_bitmap, ip_prec, ip_op);
- set_efd_bitmap (&tm->efd.mpls_exp_bitmap, mpls_exp, mpls_op);
- set_efd_bitmap (&tm->efd.vlan_cos_bitmap, vlan_cos, vlan_op);
-}