From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 19 Dec 2016 23:05:39 +0100 Subject: Reorganize source tree to use single autotools instance Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion --- src/vpp/app/l2t.c | 557 ++++++++++++++++++++++++++++++++++++++++++++ src/vpp/app/l2t_l2.c | 267 +++++++++++++++++++++ src/vpp/app/sticky_hash.c | 581 ++++++++++++++++++++++++++++++++++++++++++++++ src/vpp/app/version.c | 102 ++++++++ src/vpp/app/vpe_cli.c | 123 ++++++++++ 5 files changed, 1630 insertions(+) create mode 100644 src/vpp/app/l2t.c create mode 100644 src/vpp/app/l2t_l2.c create mode 100644 src/vpp/app/sticky_hash.c create mode 100644 src/vpp/app/version.c create mode 100644 src/vpp/app/vpe_cli.c (limited to 'src/vpp/app') diff --git a/src/vpp/app/l2t.c b/src/vpp/app/l2t.c new file mode 100644 index 00000000..45dd2807 --- /dev/null +++ b/src/vpp/app/l2t.c @@ -0,0 +1,557 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +#if DPDK == 0 +#include +#else +#include +#endif + +#include +#include +#include + +l2t_main_t l2t_main; + +/* $$$$ unused? + * get_interface_ethernet_address + * paints the ethernet address for a given interface + * into the supplied destination + */ +void +get_interface_ethernet_address (l2t_main_t * lm, u8 * dst, u32 sw_if_index) +{ + ethernet_main_t *em = ethernet_get_main (lm->vlib_main); + ethernet_interface_t *ei; + vnet_hw_interface_t *hi; + + hi = vnet_get_sup_hw_interface (lm->vnet_main, sw_if_index); + ei = pool_elt_at_index (em->interfaces, hi->hw_instance); + clib_memcpy (dst, ei->address, sizeof (ei->address)); +} + +/* packet trace format function */ +u8 * +format_l2t_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + l2t_trace_t *t = va_arg (*args, l2t_trace_t *); + + if (t->is_user_to_network) + s = format (s, "L2T: %U (client) -> %U (our) session %d", + format_ip6_address, &t->client_address, + format_ip6_address, &t->our_address, t->session_index); + else + s = format (s, "L2T: %U (our) -> %U (client) session %d)", + format_ip6_address, &t->our_address, + format_ip6_address, &t->client_address, t->session_index); + return s; +} + +u8 * +format_l2t_session (u8 * s, va_list * args) +{ + l2t_session_t *session = va_arg (*args, l2t_session_t *); + l2t_main_t *lm = &l2t_main; + u32 counter_index; + vlib_counter_t v; + + s = format (s, "[%d] %U (our) %U (client) vlan-id %d rx_sw_if_index %d\n", + session - lm->sessions, + format_ip6_address, &session->our_address, + format_ip6_address, &session->client_address, + clib_net_to_host_u16 (session->vlan_id), session->sw_if_index); + + s = format (s, " local cookie %llx remote cookie %llx\n", + clib_net_to_host_u64 (session->local_cookie), + clib_net_to_host_u64 (session->remote_cookie)); + + if (session->cookie_flags & L2TP_COOKIE_ROLLOVER_LOCAL) + { + s = format (s, " local rollover cookie %llx\n", + clib_net_to_host_u64 (session->lcl_ro_cookie)); + } + + s = format (s, " local session-id %d remote session-id %d\n", + clib_net_to_host_u32 (session->local_session_id), + clib_net_to_host_u32 (session->remote_session_id)); + + s = format (s, " l2 specific sublayer %s\n", + session->l2_sublayer_present ? "preset" : "absent"); + + counter_index = + session_index_to_counter_index (session - lm->sessions, + SESSION_COUNTER_USER_TO_NETWORK); + + vlib_get_combined_counter (&lm->counter_main, counter_index, &v); + if (v.packets != 0) + s = format (s, " user-to-net: %llu pkts %llu bytes\n", + v.packets, v.bytes); + + vlib_get_combined_counter (&lm->counter_main, counter_index + 1, &v); + + if (v.packets != 0) + s = format (s, " net-to-user: %llu pkts %llu bytes\n", + v.packets, v.bytes); + return s; +} + +static clib_error_t * +show_session_summary_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2t_main_t *lm = &l2t_main; + + vlib_cli_output (vm, "%d active sessions\n", pool_elts (lm->sessions)); + + return 0; +} + +/* *INDENT-OFF* */ +static VLIB_CLI_COMMAND (show_session_summary_command) = { + .path = "show session", + .short_help = "show session summary", + .function = show_session_summary_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_session_detail_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2t_session_t *session; + l2t_main_t *lm = &l2t_main; + + /* *INDENT-OFF* */ + pool_foreach (session, lm->sessions, + ({ + vlib_cli_output (vm, "%U", format_l2t_session, session); + })); + /* *INDENT-ON* */ + + return 0; +} + +/* *INDENT-OFF* */ +static VLIB_CLI_COMMAND (show_session_detail_command) = { + .path = "show session detail", + .short_help = "show session table detail", + .function = show_session_detail_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +test_counters_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2t_session_t *session; + l2t_main_t *lm = &l2t_main; + u32 session_index; + u32 counter_index; + u32 nincr = 0; + + /* *INDENT-OFF* */ + pool_foreach (session, lm->sessions, + ({ + session_index = session - lm->sessions; + counter_index = + session_index_to_counter_index (session_index, + SESSION_COUNTER_USER_TO_NETWORK); + vlib_increment_combined_counter (&lm->counter_main, + counter_index, + 1/*pkt*/, 1111 /*bytes*/); + vlib_increment_combined_counter (&lm->counter_main, + counter_index+1, + 1/*pkt*/, 2222 /*bytes*/); + nincr++; + })); + /* *INDENT-ON* */ + vlib_cli_output (vm, "Incremented %d active counters\n", nincr); + + return 0; +} + +/* *INDENT-OFF* */ +static VLIB_CLI_COMMAND (test_counters_command) = { + .path = "test counters", + .short_help = "increment all active counters", + .function = test_counters_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +clear_counters_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2t_session_t *session; + l2t_main_t *lm = &l2t_main; + u32 session_index; + u32 counter_index; + u32 nincr = 0; + + /* *INDENT-OFF* */ + pool_foreach (session, lm->sessions, + ({ + session_index = session - lm->sessions; + counter_index = + session_index_to_counter_index (session_index, + SESSION_COUNTER_USER_TO_NETWORK); + vlib_zero_combined_counter (&lm->counter_main, counter_index); + vlib_zero_combined_counter (&lm->counter_main, counter_index+1); + nincr++; + })); + /* *INDENT-ON* */ + vlib_cli_output (vm, "Cleared %d active counters\n", nincr); + + return 0; +} + +/* *INDENT-OFF* */ +static VLIB_CLI_COMMAND (clear_counters_command) = { + .path = "clear counters", + .short_help = "clear all active counters", + .function = clear_counters_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +l2tp_session_add_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_address_t client_address, our_address; + ip6_address_t *dst_address_copy, *src_address_copy; + unformat_input_t _line_input, *line_input = &_line_input; + u32 vlan_id; + u32 sw_if_index = (u32) ~ 0; + l2t_main_t *lm = &l2t_main; + l2t_session_t *s; + uword *p; + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + u32 next_index; + uword vlan_and_sw_if_index_key; + u32 counter_index; + u64 local_cookie = (u64) ~ 0, remote_cookie = (u64) ~ 0; + u32 local_session_id = 1, remote_session_id = 1; + int our_address_set = 0, client_address_set = 0; + int l2_sublayer_present = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "client %U", + unformat_ip6_address, &client_address)) + client_address_set = 1; + else if (unformat (line_input, "our %U", + unformat_ip6_address, &our_address)) + our_address_set = 1; + else if (unformat (line_input, "vlan %d", &vlan_id)) + ; + else if (unformat (line_input, "l2-interface %U", + unformat_vnet_sw_interface, + vnet_get_main (), &sw_if_index)) + ; + else if (unformat (line_input, "interface %U", + unformat_vnet_sw_interface, + vnet_get_main (), &sw_if_index)) + ; + else if (unformat (line_input, "local-cookie %llx", &local_cookie)) + ; + else if (unformat (line_input, "remote-cookie %llx", &remote_cookie)) + ; + else if (unformat (line_input, "local-session-id %d", + &local_session_id)) + ; + else if (unformat (line_input, "remote-session-id %d", + &remote_session_id)) + ; + else if (unformat (line_input, "l2-sublayer-present")) + l2_sublayer_present = 1; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + if (sw_if_index == (u32) ~ 0) + return clib_error_return (0, "l2-interface not specified"); + if (our_address_set == 0) + return clib_error_return (0, "our address not specified"); + if (client_address_set == 0) + return clib_error_return (0, "client address not specified"); + + remote_session_id = clib_host_to_net_u32 (remote_session_id); + local_session_id = clib_host_to_net_u32 (local_session_id); + + switch (lm->lookup_type) + { + case L2T_LOOKUP_SRC_ADDRESS: + p = hash_get_mem (lm->session_by_src_address, &client_address); + if (p) + return clib_error_return + (0, "Session w/ client address %U already exists", + format_ip6_address, &client_address); + break; + + case L2T_LOOKUP_DST_ADDRESS: + p = hash_get_mem (lm->session_by_dst_address, &our_address); + if (p) + return clib_error_return + (0, "Session w/ our address %U already exists", + format_ip6_address, &our_address); + break; + + case L2T_LOOKUP_SESSION_ID: + p = hash_get (lm->session_by_session_id, local_session_id); + if (p) + return clib_error_return + (0, + "Session w/ local session id %d already exists", + clib_net_to_host_u32 (local_session_id)); + break; + + default: + ASSERT (0); + } + + pool_get (lm->sessions, s); + memset (s, 0, sizeof (*s)); + clib_memcpy (&s->our_address, &our_address, sizeof (s->our_address)); + clib_memcpy (&s->client_address, &client_address, + sizeof (s->client_address)); + s->sw_if_index = sw_if_index; + s->vlan_id = clib_host_to_net_u16 (vlan_id); + s->local_cookie = clib_host_to_net_u64 (local_cookie); + l2tp_session_set_remote_cookie (s, remote_cookie); + s->local_session_id = local_session_id; + s->remote_session_id = remote_session_id; + s->l2_sublayer_present = l2_sublayer_present; + + hi = vnet_get_sup_hw_interface (lm->vnet_main, sw_if_index); + si = vnet_get_sup_sw_interface (lm->vnet_main, sw_if_index); + + next_index = vlib_node_add_next (vm, l2t_ip6_node.index, + hi->output_node_index); + s->l2_output_next_index = next_index; + s->l2_output_sw_if_index = si->sw_if_index; + + /* Setup hash table entries */ + switch (lm->lookup_type) + { + case L2T_LOOKUP_SRC_ADDRESS: + src_address_copy = clib_mem_alloc (sizeof (*src_address_copy)); + clib_memcpy (src_address_copy, &client_address, + sizeof (*src_address_copy)); + hash_set_mem (lm->session_by_src_address, src_address_copy, + s - lm->sessions); + break; + case L2T_LOOKUP_DST_ADDRESS: + dst_address_copy = clib_mem_alloc (sizeof (*dst_address_copy)); + clib_memcpy (dst_address_copy, &our_address, + sizeof (*dst_address_copy)); + hash_set_mem (lm->session_by_dst_address, dst_address_copy, + s - lm->sessions); + break; + case L2T_LOOKUP_SESSION_ID: + hash_set (lm->session_by_session_id, local_session_id, + s - lm->sessions); + break; + + default: + ASSERT (0); + } + + vlan_and_sw_if_index_key = ((uword) (s->vlan_id) << 32) | sw_if_index; + hash_set (lm->session_by_vlan_and_rx_sw_if_index, + vlan_and_sw_if_index_key, s - lm->sessions); + + /* validate counters */ + counter_index = + session_index_to_counter_index (s - lm->sessions, + SESSION_COUNTER_USER_TO_NETWORK); + vlib_validate_counter (&lm->counter_main, counter_index); + vlib_validate_counter (&lm->counter_main, counter_index + 1); + + /* Set promiscuous mode on the l2 interface */ + ethernet_set_flags (lm->vnet_main, hi->hw_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + vnet_hw_interface_rx_redirect_to_node (lm->vnet_main, hi->hw_if_index, + l2t_l2_node.index); + return 0; +} + +/* *INDENT-OFF* */ +static VLIB_CLI_COMMAND (l2tp_session_add_command) = { + .path = "l2tp session add", + .short_help = + "l2tp session add client our vlan local-cookie remote-cookie local-session remote-session l2-interface ", + .function = l2tp_session_add_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +l2tp_session_del_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2t_main_t *lm = &l2t_main; + u32 session_index; + l2t_session_t *s; + hash_pair_t *hp; + void *key; + uword vlan_and_sw_if_index_key; + + if (!unformat (input, "%d", &session_index)) + return clib_error_return (0, "missing session index: '%U'", + format_unformat_error, input); + + if (pool_is_free_index (lm->sessions, session_index)) + return clib_error_return (0, "session %d not in use", session_index); + + s = pool_elt_at_index (lm->sessions, session_index); + + switch (lm->lookup_type) + { + case L2T_LOOKUP_SRC_ADDRESS: + hp = hash_get_pair_mem (lm->session_by_src_address, &s->client_address); + if (hp) + { + key = (void *) (hp->key); + hash_unset_mem (lm->session_by_src_address, &s->client_address); + clib_mem_free (key); + } + else + clib_warning ("session %d src address key %U AWOL", + s - lm->sessions, + format_ip6_address, &s->client_address); + break; + + case L2T_LOOKUP_DST_ADDRESS: + hp = hash_get_pair_mem (lm->session_by_dst_address, &s->our_address); + if (hp) + { + key = (void *) (hp->key); + hash_unset_mem (lm->session_by_dst_address, &s->our_address); + clib_mem_free (key); + } + else + clib_warning ("session %d dst address key %U AWOL", + s - lm->sessions, format_ip6_address, &s->our_address); + break; + + case L2T_LOOKUP_SESSION_ID: + hash_unset (lm->session_by_session_id, s->local_session_id); + break; + + default: + ASSERT (0); + } + + vlan_and_sw_if_index_key = ((uword) (s->vlan_id) << 32) | s->sw_if_index; + + hash_unset (lm->session_by_vlan_and_rx_sw_if_index, + vlan_and_sw_if_index_key); + + pool_put (lm->sessions, s); + return 0; +} + +/* *INDENT-OFF* */ +static VLIB_CLI_COMMAND (l2tp_session_del_command) = { + .path = "l2tp session delete", + .short_help = + "l2tp session delete ", + .function = l2tp_session_del_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +l2tp_session_cookie_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2t_main_t *lm = &l2t_main; + u32 session_index; + l2t_session_t *s; + u64 lcl_ro_cookie = (u64) ~ 0, rem_ro_cookie = (u64) ~ 0; + u8 cookie_flags = 0; + + if (!unformat (input, "%d", &session_index)) + return clib_error_return (0, "missing session index: '%U'", + format_unformat_error, input); + + if (pool_is_free_index (lm->sessions, session_index)) + return clib_error_return (0, "session %d not in use", session_index); + + s = pool_elt_at_index (lm->sessions, session_index); + + if (unformat (input, "commit")) + { + if (!s->cookie_flags) + { + return clib_error_return (0, "no rollover cookie ready to commit"); + } + else + { + l2tp_session_cookie_commit (s); + return 0; + } + } + if (!unformat (input, "rollover")) + return clib_error_return (0, "missing 'commit|rollover': '%U'", + format_unformat_error, input); + if (unformat (input, "local %llx", &lcl_ro_cookie)) + { + cookie_flags |= L2TP_COOKIE_ROLLOVER_LOCAL; + l2tp_session_set_local_rollover_cookie (s, lcl_ro_cookie); + } + if (unformat (input, "remote %llx", &rem_ro_cookie)) + { + cookie_flags |= L2TP_COOKIE_ROLLOVER_REMOTE; + l2tp_session_set_remote_cookie (s, rem_ro_cookie); + } + if (!cookie_flags) + return clib_error_return (0, "no rollover cookie specified"); + + return 0; +} + +/* *INDENT-OFF* */ +static VLIB_CLI_COMMAND (l2tp_session_cookie_command) = { + .path = "l2tp session cookie", + .short_help = + "l2tp session cookie commit|rollover [local ] [remote ]", + .function = l2tp_session_cookie_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/app/l2t_l2.c b/src/vpp/app/l2t_l2.c new file mode 100644 index 00000000..07d30d9a --- /dev/null +++ b/src/vpp/app/l2t_l2.c @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +#if DPDK == 0 +#include +#include +#include +#else +#include +#endif + +#include +#include +#include + +l2t_main_t l2t_main; + +/* Statistics (not really errors) */ +#define foreach_l2t_l2_error \ +_(NETWORK_TO_USER, "L2 network to user (ip6) pkts") + +static char *l2t_l2_error_strings[] = { +#define _(sym,string) string, + foreach_l2t_l2_error +#undef _ +}; + +typedef enum +{ +#define _(sym,str) L2T_L2_ERROR_##sym, + foreach_l2t_l2_error +#undef _ + L2T_L2_N_ERROR, +} l2t_l2_error_t; + +/* + * Packets go to ethernet-input when they don't match a mapping + */ +typedef enum +{ + L2T_L2_NEXT_DROP, + L2T_L2_NEXT_ETHERNET_INPUT, + L2T_L2_NEXT_IP6_LOOKUP, + L2T_L2_N_NEXT, +} l2t_l2_next_t; + +vlib_node_registration_t l2t_l2_node; + +#define NSTAGES 3 + +static inline void +stage0 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); + vlib_prefetch_buffer_header (b, STORE); + CLIB_PREFETCH (b->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); +} + +static inline void +stage1 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + l2t_main_t *lm = &l2t_main; + ethernet_header_t *eh; + ethernet_vlan_header_t *vh; + u32 session_index; + uword *p; + uword vlan_and_sw_if_index_key; + + /* just in case, needed to test with the tun/tap device */ + vlib_buffer_reset (b); + + eh = vlib_buffer_get_current (b); + + /* Not a VLAN pkt? send to ethernet-input... */ + if (PREDICT_FALSE (eh->type != clib_host_to_net_u16 (0x8100))) + { + vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_ETHERNET_INPUT; + return; + } + vh = (ethernet_vlan_header_t *) (eh + 1); + + /* look up session */ + vlan_and_sw_if_index_key = ((uword) (vh->priority_cfi_and_id) << 32) + | vnet_buffer (b)->sw_if_index[VLIB_RX]; + + p = hash_get (lm->session_by_vlan_and_rx_sw_if_index, + vlan_and_sw_if_index_key); + + if (PREDICT_FALSE (p == 0)) + { + /* $$$ drop here if not for our MAC? */ + vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_ETHERNET_INPUT; + return; + } + else + { + session_index = p[0]; + } + + /* Remember mapping index, prefetch the mini counter */ + vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_IP6_LOOKUP; + vnet_buffer (b)->l2t.session_index = session_index; + + /* Each mapping has 2 x (pkt, byte) counters, hence the shift */ + CLIB_PREFETCH (lm->counter_main.mini + (p[0] << 1), CLIB_CACHE_LINE_BYTES, + STORE); +} + +static inline u32 +last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + l2t_main_t *lm = &l2t_main; + ethernet_header_t *eh = vlib_buffer_get_current (b); + vlib_node_t *n = vlib_get_node (vm, l2t_l2_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t *em = &vm->error_main; + l2tpv3_header_t *l2t; /* l2 header */ + ethernet_vlan_header_t *vh; /* 802.1q vlan header */ + u32 counter_index; + l2t_session_t *s; + ip6_header_t *ip6; + u16 payload_ethertype; + u8 dst_mac_address[6]; + u8 src_mac_address[6]; + u16 payload_length; + i32 backup; + + /* Other-than-output pkt? We're done... */ + if (vnet_buffer (b)->l2t.next_index != L2T_L2_NEXT_IP6_LOOKUP) + return vnet_buffer (b)->l2t.next_index; + + vh = (ethernet_vlan_header_t *) (eh + 1); + + em->counters[node_counter_base_index + L2T_L2_ERROR_NETWORK_TO_USER] += 1; + + counter_index = + session_index_to_counter_index (vnet_buffer (b)->l2t.session_index, + SESSION_COUNTER_NETWORK_TO_USER); + + /* per-mapping byte stats include the ethernet header */ + vlib_increment_combined_counter (&lm->counter_main, counter_index, + 1 /* packet_increment */ , + vlib_buffer_length_in_chain (vm, b) + + sizeof (ethernet_header_t)); + + s = pool_elt_at_index (lm->sessions, vnet_buffer (b)->l2t.session_index); + + /* Save src/dst MAC addresses */ +#define _(i) dst_mac_address[i] = eh->dst_address[i]; + _(0) _(1) _(2) _(3) _(4) _(5); +#undef _ +#define _(i) src_mac_address[i] = eh->src_address[i]; + _(0) _(1) _(2) _(3) _(4) _(5); +#undef _ + + payload_ethertype = vh->type; + + /* Splice out the 802.1q vlan tag */ + vlib_buffer_advance (b, 4); + eh = vlib_buffer_get_current (b); + + /* restore src/dst MAC addresses */ +#define _(i) eh->dst_address[i] = dst_mac_address[i]; + _(0) _(1) _(2) _(3) _(4) _(5); +#undef _ +#define _(i) eh->src_address[i] = src_mac_address[i]; + _(0) _(1) _(2) _(3) _(4) _(5); +#undef _ + eh->type = payload_ethertype; + + /* Paint on an l2tpv3 hdr */ + backup = sizeof (*l2t); +#if 0 + /* back up 4 bytes less if no l2 sublayer */ + backup -= s->l2_sublayer_present ? 0 : 4; +#endif + + vlib_buffer_advance (b, -backup); + l2t = vlib_buffer_get_current (b); + + l2t->session_id = s->remote_session_id; + l2t->cookie = s->remote_cookie; + +#if 0 + if (s->l2_sublayer_present) + l2t->l2_specific_sublayer = 0; +#endif + + /* Paint on an ip6 header */ + vlib_buffer_advance (b, -(sizeof (*ip6))); + ip6 = vlib_buffer_get_current (b); + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + + /* calculate ip6 payload length */ + payload_length = vlib_buffer_length_in_chain (vm, b); + payload_length -= sizeof (*ip6); + + ip6->payload_length = clib_host_to_net_u16 (payload_length); + ip6->protocol = 0x73; /* l2tpv3 */ + ip6->hop_limit = 0xff; + ip6->src_address.as_u64[0] = s->our_address.as_u64[0]; + ip6->src_address.as_u64[1] = s->our_address.as_u64[1]; + ip6->dst_address.as_u64[0] = s->client_address.as_u64[0]; + ip6->dst_address.as_u64[1] = s->client_address.as_u64[1]; + + return L2T_L2_NEXT_IP6_LOOKUP; +} + +#include + +static uword +l2t_l2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return dispatch_pipeline (vm, node, frame); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2t_l2_node) = { + .function = l2t_l2_node_fn, + .name = "l2t-l2-input", + .vector_size = sizeof (u32), + .format_trace = format_l2t_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(l2t_l2_error_strings), + .error_strings = l2t_l2_error_strings, + + .n_next_nodes = L2T_L2_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [L2T_L2_NEXT_IP6_LOOKUP] = "ip6-lookup", + [L2T_L2_NEXT_ETHERNET_INPUT] = "ethernet-input", + [L2T_L2_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (l2t_l2_node, l2t_l2_node_fn); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/app/sticky_hash.c b/src/vpp/app/sticky_hash.c new file mode 100644 index 00000000..5569c677 --- /dev/null +++ b/src/vpp/app/sticky_hash.c @@ -0,0 +1,581 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct +{ + u32 fwd_entry_index; + u32 rev_entry_index; + /* Not strictly needed, for show command */ + u32 fib_index; +} sticky_hash_session_t; + +typedef struct +{ + u32 cached_next_index; + + /* next index added to l2_classify */ + u32 fwd_miss_next_index; + + /* session pool */ + sticky_hash_session_t *sessions; + + /* Forward and reverse data session setup buffers */ + u8 fdata[3 * sizeof (u32x4)]; + u8 rdata[3 * sizeof (u32x4)]; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + vnet_classify_main_t *vnet_classify_main; + l2_input_classify_main_t *l2_input_classify_main; +} +sticky_hash_main_t; + +typedef struct +{ + /* $$$$ fill in with per-pkt trace data */ + u32 next_index; + u32 sw_if_index; +} sticky_hash_miss_trace_t; + +/* packet trace format function */ +static u8 * +format_sticky_hash_miss_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sticky_hash_miss_trace_t *t = va_arg (*args, sticky_hash_miss_trace_t *); + + s = format (s, "STICKY_HASH_MISS: sw_if_index %d", t->sw_if_index); + return s; +} + +typedef CLIB_PACKED (struct + { + ethernet_header_t eh; ip4_header_t ip; + }) classify_data_or_mask_t; + +sticky_hash_main_t sticky_hash_main; + +vlib_node_registration_t sticky_hash_miss_node; + +#define foreach_sticky_hash_miss_error \ +_(MISSES, "forward flow classify misses") + +typedef enum +{ +#define _(sym,str) STICKY_HASH_MISS_ERROR_##sym, + foreach_sticky_hash_miss_error +#undef _ + STICKY_HASH_MISS_N_ERROR, +} sticky_hash_miss_error_t; + +static char *sticky_hash_miss_error_strings[] = { +#define _(sym,string) string, + foreach_sticky_hash_miss_error +#undef _ +}; + +/* + * To drop a pkt and increment one of the previous counters: + * + * set b0->error = error_node->errors[STICKY_HASH_MISS_ERROR_EXAMPLE]; + * set next0 to a disposition index bound to "error-drop". + * + * To manually increment the specific counter STICKY_HASH_MISS_ERROR_EXAMPLE: + * + * vlib_node_t *n = vlib_get_node (vm, sticky_hash_miss.index); + * u32 node_counter_base_index = n->error_heap_index; + * vlib_error_main_t * em = &vm->error_main; + * em->counters[node_counter_base_index + STICKY_HASH_MISS_ERROR_EXAMPLE] += 1; + * + */ + +typedef enum +{ + STICKY_HASH_MISS_NEXT_IP4_INPUT, + STICKY_HASH_MISS_N_NEXT, +} sticky_hash_miss_next_t; + +static uword +sticky_hash_miss_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + sticky_hash_miss_next_t next_index; + sticky_hash_main_t *mp = &sticky_hash_main; + vlib_node_t *n = vlib_get_node (vm, sticky_hash_miss_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t *em = &vm->error_main; + vnet_classify_main_t *cm = mp->vnet_classify_main; + ip4_main_t *im = &ip4_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + u32 fib_index0, ft_index0, rt_index0; + vnet_classify_table_3_t *ft0, *rt0; + vnet_classify_entry_3_t *fe0, *re0; + classify_data_or_mask_t *h0; + u8 was_found0; + ip4_fib_t *fib0; + sticky_hash_session_t *s; + u32 tmp; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + next0 = mp->cached_next_index; + + h0 = vlib_buffer_get_current (b0); + + /* Add forward and reverse entries for this flow */ + clib_memcpy (mp->fdata, h0, sizeof (mp->fdata)); + clib_memcpy (mp->rdata, h0, sizeof (mp->rdata)); + + h0 = (classify_data_or_mask_t *) (mp->rdata); + + /* swap src + dst addresses to form reverse data */ + tmp = h0->ip.src_address.as_u32; + h0->ip.src_address.as_u32 = h0->ip.dst_address.as_u32; + h0->ip.dst_address.as_u32 = tmp; + + /* dig up fwd + rev tables */ + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); + fib0 = vec_elt_at_index (im->fibs, fib_index0); + + ft_index0 = fib0->fwd_classify_table_index; + rt_index0 = fib0->rev_classify_table_index; + + ft0 = (vnet_classify_table_3_t *) + pool_elt_at_index (cm->tables, ft_index0); + rt0 = (vnet_classify_table_3_t *) + pool_elt_at_index (cm->tables, rt_index0); + + fe0 = + vnet_classify_find_or_add_entry_3 (ft0, mp->fdata, &was_found0); + fe0->next_index = L2_INPUT_CLASSIFY_NEXT_IP4_INPUT; + fe0->advance = sizeof (ethernet_header_t); + + re0 = vnet_classify_find_or_add_entry_3 (rt0, mp->rdata, 0); + re0->next_index = L2_INPUT_CLASSIFY_NEXT_IP4_INPUT; /* $$$ FIXME */ + re0->advance = sizeof (ethernet_header_t); + + /* Note: we could get a whole vector of misses for the same sess */ + if (was_found0 == 0) + { + pool_get (mp->sessions, s); + + fe0->opaque_index = s - mp->sessions; + re0->opaque_index = s - mp->sessions; + + s->fwd_entry_index = fe0 - ft0->entries; + s->rev_entry_index = re0 - rt0->entries; + s->fib_index = fib_index0; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + sticky_hash_miss_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + + em->counters[node_counter_base_index + + STICKY_HASH_MISS_ERROR_MISSES] += 1; + + vlib_buffer_advance (b0, sizeof (ethernet_header_t)); + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sticky_hash_miss_node) = { + .function = sticky_hash_miss_node_fn, + .name = "sticky-hash-miss", + .vector_size = sizeof (u32), + .format_trace = format_sticky_hash_miss_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(sticky_hash_miss_error_strings), + .error_strings = sticky_hash_miss_error_strings, + + .n_next_nodes = STICKY_HASH_MISS_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [STICKY_HASH_MISS_NEXT_IP4_INPUT] = "ip4-input", + }, +}; +/* *INDENT-ON* */ + +clib_error_t * +sticky_hash_miss_init (vlib_main_t * vm) +{ + sticky_hash_main_t *mp = &sticky_hash_main; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + mp->vnet_classify_main = &vnet_classify_main; + mp->l2_input_classify_main = &l2_input_classify_main; + + return 0; +} + +VLIB_INIT_FUNCTION (sticky_hash_miss_init); + +static int ip4_sticky_hash_enable_disable + (sticky_hash_main_t * mp, + u32 fwd_sw_if_index, u8 * fwd_mask, + u32 rev_sw_if_index, u8 * rev_mask, u32 nbuckets, int enable_disable) +{ + ip4_main_t *im = &ip4_main; + u32 fib_index; + ip4_fib_t *fib; + vnet_classify_main_t *cm = mp->vnet_classify_main; + l2_input_classify_main_t *l2cm = mp->l2_input_classify_main; + vnet_classify_table_3_t *ft, *rt; + + fib_index = vec_elt (im->fib_index_by_sw_if_index, fwd_sw_if_index); + fib = vec_elt_at_index (im->fibs, fib_index); + + if (fib->fwd_classify_table_index == ~0) + { + /* Set up forward table */ + ft = (vnet_classify_table_3_t *) + vnet_classify_new_table (cm, fwd_mask, nbuckets, + 0 /* skip */ , 3 /* match */ ); + fib->fwd_classify_table_index + = ft - (vnet_classify_table_3_t *) cm->tables; + mp->fwd_miss_next_index = + vlib_node_add_next (mp->vlib_main, l2_input_classify_node.index, + sticky_hash_miss_node.index); + ft->miss_next_index = mp->fwd_miss_next_index; + + /* Set up reverse table */ + rt = (vnet_classify_table_3_t *) + vnet_classify_new_table (cm, rev_mask, nbuckets, + 0 /* skip */ , 3 /* match */ ); + fib->rev_classify_table_index + = rt - (vnet_classify_table_3_t *) cm->tables; + } + + vec_validate + (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4], + fwd_sw_if_index); + + vec_validate + (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6], + fwd_sw_if_index); + + vec_validate + (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER], + fwd_sw_if_index); + + l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4] + [fwd_sw_if_index] = fib->fwd_classify_table_index; + + l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6] + [fwd_sw_if_index] = ~0; + + l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER] + [fwd_sw_if_index] = ~0; + + + vec_validate + (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4], + rev_sw_if_index); + + vec_validate + (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6], + rev_sw_if_index); + + vec_validate + (l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER], + rev_sw_if_index); + + + l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4] + [rev_sw_if_index] = fib->rev_classify_table_index; + + l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6] + [rev_sw_if_index] = ~0; + + l2cm->classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_OTHER] + [rev_sw_if_index] = ~0; + + vnet_l2_input_classify_enable_disable (fwd_sw_if_index, enable_disable); + vnet_l2_input_classify_enable_disable (rev_sw_if_index, enable_disable); + return 0; +} + +static clib_error_t * +ip4_sticky_hash_init_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 fwd_sw_if_index = ~0, rev_sw_if_index = ~0; + int enable_disable = 1; + u32 nbuckets = 2; + int rv; + sticky_hash_main_t *mp = &sticky_hash_main; + classify_data_or_mask_t fwd_mask, rev_mask; + u8 *fm = 0, *rm = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "fwd %U", unformat_vnet_sw_interface, mp->vnet_main, + &fwd_sw_if_index)) + ; + if (unformat + (input, "rev %U", unformat_vnet_sw_interface, mp->vnet_main, + &rev_sw_if_index)) + ; + else if (unformat (input, "nbuckets %d", &nbuckets)) + ; + else if (unformat (input, "disable")) + enable_disable = 0; + + else + break; + } + + nbuckets = 1 << max_log2 (nbuckets); + + if (fwd_sw_if_index == ~0) + return clib_error_return (0, "fwd interface not set"); + + if (rev_sw_if_index == ~0) + return clib_error_return (0, "rev interface not set"); + + if (!is_pow2 (nbuckets)) + return clib_error_return (0, "nbuckets %d not a power of 2", nbuckets); + + ASSERT (sizeof (fwd_mask) <= 3 * sizeof (u32x4)); + + /* Mask on src/dst address, depending on direction */ + memset (&fwd_mask, 0, sizeof (fwd_mask)); + memset (&fwd_mask.ip.src_address, 0xff, 4); + + memset (&rev_mask, 0, sizeof (rev_mask)); + memset (&rev_mask.ip.dst_address, 0xff, 4); + + vec_validate (fm, 3 * sizeof (u32x4) - 1); + vec_validate (rm, 3 * sizeof (u32x4) - 1); + + clib_memcpy (fm, &fwd_mask, sizeof (fwd_mask)); + clib_memcpy (rm, &rev_mask, sizeof (rev_mask)); + + rv = ip4_sticky_hash_enable_disable (mp, fwd_sw_if_index, fm, + rev_sw_if_index, rm, + nbuckets, enable_disable); + + vec_free (fm); + vec_free (rm); + switch (rv) + { + case 0: + return 0; + + default: + return clib_error_return (0, + "ip4_sticky_hash_enable_disable returned %d", + rv); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sticky_hash_init_command, static) = { + .path = "ip sticky classify", + .short_help = "ip sticky classify fwd rev " + "[nbuckets ][disable]", + .function = ip4_sticky_hash_init_command_fn, +}; +/* *INDENT-ON* */ + + +u8 * +format_sticky_hash_session (u8 * s, va_list * args) +{ + sticky_hash_main_t *mp = va_arg (*args, sticky_hash_main_t *); + sticky_hash_session_t *session = va_arg (*args, sticky_hash_session_t *); + vnet_classify_table_3_t *t; + vnet_classify_entry_3_t *e; + ip4_main_t *im = &ip4_main; + vnet_classify_main_t *cm = mp->vnet_classify_main; + ip4_fib_t *fib; + classify_data_or_mask_t *match; + + fib = vec_elt_at_index (im->fibs, session->fib_index); + + t = (vnet_classify_table_3_t *) + pool_elt_at_index (cm->tables, fib->fwd_classify_table_index); + e = pool_elt_at_index (t->entries, session->fwd_entry_index); + match = (classify_data_or_mask_t *) (e->key); + + s = format + (s, + "[%6d] fwd src %U next index %d session %d fib %d\n" + " hits %lld last-heard %.6f\n", + e - t->entries, + format_ip4_address, &match->ip.src_address, + e->next_index, e->opaque_index, fib->table_id, e->hits, e->last_heard); + + if (e->opaque_index != session - mp->sessions) + s = format (s, "WARNING: forward session index mismatch!\n"); + + t = (vnet_classify_table_3_t *) + pool_elt_at_index (cm->tables, fib->rev_classify_table_index); + e = pool_elt_at_index (t->entries, session->rev_entry_index); + match = (classify_data_or_mask_t *) (e->key); + + s = format + (s, + "[%6d] rev dst %U next index %d session %d\n" + " hits %lld last-heard %.6f\n", + e - t->entries, + format_ip4_address, &match->ip.dst_address, + e->next_index, e->opaque_index, e->hits, e->last_heard); + + if (e->opaque_index != session - mp->sessions) + s = format (s, "WARNING: reverse session index mismatch!\n"); + s = format (s, "---------\n"); + + return s; +} + +static clib_error_t * +show_ip4_sticky_hash_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + sticky_hash_main_t *mp = &sticky_hash_main; + sticky_hash_session_t *s; + int verbose = 0; + int dump_classifier_tables = 0; + ip4_fib_t *fib; + ip4_main_t *im4 = &ip4_main; + vnet_classify_main_t *cm = mp->vnet_classify_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "dump-tables") + || unformat (input, "dump-classifier-tables")) + dump_classifier_tables = 1; + else + break; + } + + if (pool_elts (mp->sessions) == 0) + vlib_cli_output (vm, "No ip sticky hash sessions"); + + + vlib_cli_output (vm, "%d active sessions\n", pool_elts (mp->sessions)); + + vec_foreach (fib, im4->fibs) + { + if (fib->fwd_classify_table_index != ~0) + vlib_cli_output (vm, "fib %d fwd table: \n%U", + fib->table_id, + format_classify_table, + cm, + pool_elt_at_index + (cm->tables, fib->fwd_classify_table_index), + dump_classifier_tables); + if (fib->rev_classify_table_index != ~0) + vlib_cli_output (vm, "fib %d rev table: \n%U", + fib->table_id, + format_classify_table, + cm, + pool_elt_at_index + (cm->tables, fib->rev_classify_table_index), + dump_classifier_tables); + } + + if (verbose) + { + /* *INDENT-OFF* */ + pool_foreach (s, mp->sessions, + ({ + vlib_cli_output (vm, "%U", format_sticky_hash_session, mp, s); + })); + /* *INDENT-ON* */ + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sticky_hash_command, static) = { + .path = "show sticky classify", + .short_help = "Display sticky classifier tables", + .function = show_ip4_sticky_hash_command_fn, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/app/version.c b/src/vpp/app/version.c new file mode 100644 index 00000000..60844c98 --- /dev/null +++ b/src/vpp/app/version.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +#if DPDK > 0 +#include +#include +#include +#endif /* DPDK */ + +static char *vpe_version_string = + "vpp v" VPP_BUILD_VER + " built by " VPP_BUILD_USER " on " VPP_BUILD_HOST " at " VPP_BUILD_DATE; + +static char *vpe_compiler = +#if defined(__INTEL_COMPILER) +#define __(x) #x +#define _(x) __(x) + "icc " _(__INTEL_COMPILER) " (" __VERSION__ ")"; +#undef _ +#undef __ +#elif defined(__clang__) + "Clang/LLVM " __clang_version__; +#elif defined (__GNUC__) + "GCC " __VERSION__; +#else + "unknown compiler"; +#endif + +static clib_error_t * +show_vpe_version_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + if (unformat (input, "verbose")) + { +#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c); + _("Version", "%s", "v" VPP_BUILD_VER); + _("Compiled by", "%s", VPP_BUILD_USER); + _("Compile host", "%s", VPP_BUILD_HOST); + _("Compile date", "%s", VPP_BUILD_DATE); + _("Compile location", "%s", VPP_BUILD_TOPDIR); + _("Compiler", "%s", vpe_compiler); + _("Current PID", "%d", getpid ()); +#if DPDK > 0 + _("DPDK Version", "%s", rte_version ()); + _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); +#endif +#undef _ + } + else + vlib_cli_output (vm, "%s", vpe_version_string); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_vpe_version_command, static) = { + .path = "show version", + .short_help = "show version information", + .function = show_vpe_version_command_fn, +}; +/* *INDENT-ON* */ + +char * +vpe_api_get_build_directory (void) +{ + return VPP_BUILD_TOPDIR; +} + +char * +vpe_api_get_version (void) +{ + return VPP_BUILD_VER; +} + +char * +vpe_api_get_build_date (void) +{ + return VPP_BUILD_DATE; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/app/vpe_cli.c b/src/vpp/app/vpe_cli.c new file mode 100644 index 00000000..a26bf71f --- /dev/null +++ b/src/vpp/app/vpe_cli.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include + +typedef struct +{ + u8 mac_addr[6]; +} mac_addr_t; + +static clib_error_t * +virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + ip46_address_t next_hop, *next_hops; + fib_route_path_t *rpaths; + fib_prefix_t prefix; + u8 mac_addr[6]; + mac_addr_t *mac_addrs = 0; + u32 sw_if_index; + u32 i; + + next_hops = NULL; + rpaths = NULL; + prefix.fp_len = 32; + prefix.fp_proto = FIB_PROTOCOL_IP4; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (!unformat (line_input, "%U %U", + unformat_ip4_address, &prefix.fp_addr.ip4, + unformat_vnet_sw_interface, vnm, &sw_if_index)) + goto barf; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "mac %U", + unformat_ethernet_address, &mac_addr)) + { + mac_addr_t *ma; + vec_add2 (mac_addrs, ma, 1); + clib_memcpy (ma, mac_addr, sizeof (mac_addr)); + } + else if (unformat (line_input, "next-hop %U", + unformat_ip4_address, &next_hop.ip4)) + { + vec_add1 (next_hops, next_hop); + } + else + { + barf: + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + if (vec_len (mac_addrs) == 0 || vec_len (mac_addrs) != vec_len (next_hops)) + goto barf; + + /* Create / delete special interface route /32's */ + + for (i = 0; i < vec_len (mac_addrs); i++) + { + fib_route_path_t *rpath; + + adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &next_hops[i], + sw_if_index, mac_addrs[i].mac_addr); + + vec_add2 (rpaths, rpath, 1); + + rpath->frp_proto = FIB_PROTOCOL_IP4; + rpath->frp_addr = next_hops[i]; + rpath->frp_sw_if_index = sw_if_index; + rpath->frp_fib_index = ~0; + rpath->frp_weight = 1; + rpath->frp_label_stack = NULL; + } + + fib_table_entry_path_add2 (0, // default FIB table + &prefix, + FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, rpaths); + + vec_free (mac_addrs); + vec_free (next_hops); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (virtual_ip_cmd_fn_command, static) = { + .path = "ip virtual", + .short_help = "ip virtual [mac ]+", + .function = virtual_ip_cmd_fn_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 597d3c4121b4dd557328c11debb42927a45d52fb Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 16 Jan 2017 21:36:28 +0100 Subject: dpdk: add 'show dpdk version' cli Change-Id: Iaecebae25ee4b8df8ca919992a0433e92e82e90c Signed-off-by: Damjan Marion --- src/vnet/devices/dpdk/cli.c | 20 ++++++++++++++++++++ src/vpp/app/version.c | 10 ---------- src/vpp/oam/oam.c | 4 ---- 3 files changed, 20 insertions(+), 14 deletions(-) (limited to 'src/vpp/app') diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c index 22bd4b4f..3bbace26 100644 --- a/src/vnet/devices/dpdk/cli.c +++ b/src/vnet/devices/dpdk/cli.c @@ -1279,6 +1279,26 @@ VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { }; /* *INDENT-ON* */ +static clib_error_t * +show_dpdk_version_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ +#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c); + _("DPDK Version", "%s", rte_version ()); + _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); +#undef _ + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_vpe_version_command, static) = { + .path = "show dpdk version", + .short_help = "show dpdk version information", + .function = show_dpdk_version_command_fn, +}; +/* *INDENT-ON* */ + clib_error_t * dpdk_cli_init (vlib_main_t * vm) { diff --git a/src/vpp/app/version.c b/src/vpp/app/version.c index 60844c98..0a2c7fd4 100644 --- a/src/vpp/app/version.c +++ b/src/vpp/app/version.c @@ -16,12 +16,6 @@ #include #include -#if DPDK > 0 -#include -#include -#include -#endif /* DPDK */ - static char *vpe_version_string = "vpp v" VPP_BUILD_VER " built by " VPP_BUILD_USER " on " VPP_BUILD_HOST " at " VPP_BUILD_DATE; @@ -56,10 +50,6 @@ show_vpe_version_command_fn (vlib_main_t * vm, _("Compile location", "%s", VPP_BUILD_TOPDIR); _("Compiler", "%s", vpe_compiler); _("Current PID", "%d", getpid ()); -#if DPDK > 0 - _("DPDK Version", "%s", rte_version ()); - _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); -#endif #undef _ } else diff --git a/src/vpp/oam/oam.c b/src/vpp/oam/oam.c index 07e17b64..ef061207 100644 --- a/src/vpp/oam/oam.c +++ b/src/vpp/oam/oam.c @@ -14,10 +14,6 @@ */ #include -#if DPDK > 0 -#include -#endif - oam_main_t oam_main; static vlib_node_registration_t oam_node; -- cgit 1.2.3-korg From a9a20e7f69f4a91a4d5267ab5ce14125bdc7d6c6 Mon Sep 17 00:00:00 2001 From: Billy McFall Date: Wed, 15 Feb 2017 11:39:12 -0500 Subject: VPP-635: CLI Memory leak with invalid parameter In the CLI parsing, below is a common pattern: /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "x")) x = 1; : else return clib_error_return (0, "unknown input `%U'", format_unformat_error, line_input); } unformat_free (line_input); The 'else' returns if an unknown string is encountered. There a memory leak because the 'unformat_free(line_input)' is not called. There is a large number of instances of this pattern. Replaced the previous pattern with: /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "x")) x = 1; : else { error = clib_error_return (0, "unknown input `%U'", format_unformat_error, line_input); goto done: } } /* ...Remaining code... */ done: unformat_free (line_input); return error; } In multiple files, 'unformat_free (line_input);' was never called, so there was a memory leak whether an invalid string was entered or not. Also, there were multiple instance where: error = clib_error_return (0, "unknown input `%U'", format_unformat_error, line_input); used 'input' as the last parameter instead of 'line_input'. The result is that output did not contain the substring in error, instead just an empty string. Fixed all of those as well. There are a lot of file, and very mind numbing work, so tried to keep it to a pattern to avoid mistakes. Change-Id: I8902f0c32a47dd7fb3bb3471a89818571702f1d2 Signed-off-by: Billy McFall Signed-off-by: Dave Barach --- build-root/emacs-lisp/tunnel-c-skel.el | 19 ++- src/plugins/ila/ila.c | 25 ++- src/plugins/lb/cli.c | 99 ++++++----- src/plugins/sixrd/sixrd.c | 42 +++-- src/plugins/snat/snat.c | 139 +++++++++++----- src/vlib/threads_cli.c | 79 ++++++--- src/vlib/trace.c | 13 +- src/vlib/unix/cli.c | 22 ++- src/vnet/devices/af_packet/cli.c | 56 +++++-- src/vnet/devices/dpdk/cli.c | 290 +++++++++++++++++++++++---------- src/vnet/devices/dpdk/ipsec/cli.c | 15 +- src/vnet/devices/netmap/cli.c | 54 ++++-- src/vnet/devices/virtio/vhost-user.c | 62 +++++-- src/vnet/gre/interface.c | 35 ++-- src/vnet/ip/ip4_source_check.c | 6 +- src/vnet/ip/ip4_test.c | 15 +- src/vnet/ip/ip6_neighbor.c | 27 ++- src/vnet/ip/lookup.c | 34 ++-- src/vnet/ipsec-gre/interface.c | 34 ++-- src/vnet/ipsec/ipsec_cli.c | 177 +++++++++++++------- src/vnet/l2/l2_patch.c | 26 ++- src/vnet/l2/l2_xcrw.c | 34 +++- src/vnet/l2tp/l2tp.c | 39 +++-- src/vnet/lisp-cp/lisp_cli.c | 139 ++++++++++++---- src/vnet/lisp-gpe/interface.c | 58 +++++-- src/vnet/lisp-gpe/lisp_gpe.c | 13 +- src/vnet/map/map.c | 186 +++++++++++++++------ src/vnet/mpls/mpls.c | 2 + src/vnet/mpls/mpls_tunnel.c | 19 ++- src/vnet/pg/cli.c | 39 +++-- src/vnet/policer/node_funcs.c | 19 ++- src/vnet/policer/policer.c | 13 +- src/vnet/unix/tapcli.c | 57 +++++-- src/vnet/vxlan-gpe/vxlan_gpe.c | 62 +++++-- src/vnet/vxlan/vxlan.c | 81 ++++++--- src/vpp/app/l2t.c | 9 +- src/vpp/app/vpe_cli.c | 24 ++- 37 files changed, 1487 insertions(+), 576 deletions(-) (limited to 'src/vpp/app') diff --git a/build-root/emacs-lisp/tunnel-c-skel.el b/build-root/emacs-lisp/tunnel-c-skel.el index aa260e53..a1b1757d 100644 --- a/build-root/emacs-lisp/tunnel-c-skel.el +++ b/build-root/emacs-lisp/tunnel-c-skel.el @@ -288,6 +288,7 @@ static clib_error_t * vlib_cli_command_t * cmd) { unformat_input_t _line_input, * line_input = &_line_input; + clib_error_t *error = 0; ip4_address_t src, dst; u8 is_add = 1; u8 src_set = 0; @@ -322,13 +323,19 @@ static clib_error_t * { encap_fib_index = fib_index_from_fib_id (tmp); if (encap_fib_index == ~0) - return clib_error_return (0, \"nonexistent encap fib id %d\", tmp); + { + unformat_free (line_input); + return clib_error_return (0, \"nonexistent encap fib id %d\", tmp); + } } else if (unformat (line_input, \"decap-vrf-id %d\", &tmp)) { decap_fib_index = fib_index_from_fib_id (tmp); if (decap_fib_index == ~0) - return clib_error_return (0, \"nonexistent decap fib id %d\", tmp); + { + unformat_free (line_input); + return clib_error_return (0, \"nonexistent decap fib id %d\", tmp); + } } else if (unformat (line_input, \"decap-next %U\", unformat_decap_next, &decap_next_index)) @@ -346,8 +353,12 @@ static clib_error_t * * in the " ENCAP_STACK " header */ else - return clib_error_return (0, \"parse error: '%U'\", - format_unformat_error, line_input); + { + error = clib_error_return (0, \"parse error: '%U'\", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } } unformat_free (line_input); diff --git a/src/plugins/ila/ila.c b/src/plugins/ila/ila.c index e0f3907f..52c7ea55 100644 --- a/src/plugins/ila/ila.c +++ b/src/plugins/ila/ila.c @@ -949,6 +949,7 @@ ila_entry_command_fn (vlib_main_t * vm, ila_add_del_entry_args_t args = { 0 }; u8 next_hop_set = 0; int ret; + clib_error_t *error = 0; args.type = ILA_TYPE_IID; args.csum_mode = ILA_CSUM_MODE_NO_ACTION; @@ -986,19 +987,29 @@ ila_entry_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) args.is_del = 1; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (!next_hop_set) - return clib_error_return (0, "Specified a next hop"); + { + error = clib_error_return (0, "Specified a next hop"); + goto done; + } if ((ret = ila_add_del_entry (&args))) - return clib_error_return (0, "ila_add_del_entry returned error %d", ret); + { + error = clib_error_return (0, "ila_add_del_entry returned error %d", ret); + goto done; + } - return NULL; +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (ila_entry_command, static) = diff --git a/src/plugins/lb/cli.c b/src/plugins/lb/cli.c index b59c6426..6452a875 100644 --- a/src/plugins/lb/cli.c +++ b/src/plugins/lb/cli.c @@ -28,13 +28,16 @@ lb_vip_command_fn (vlib_main_t * vm, int ret; u32 gre4 = 0; lb_vip_type_t type; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; - if (!unformat(line_input, "%U", unformat_ip46_prefix, &prefix, &plen, IP46_TYPE_ANY, &plen)) - return clib_error_return (0, "invalid vip prefix: '%U'", - format_unformat_error, line_input); + if (!unformat(line_input, "%U", unformat_ip46_prefix, &prefix, &plen, IP46_TYPE_ANY, &plen)) { + error = clib_error_return (0, "invalid vip prefix: '%U'", + format_unformat_error, line_input); + goto done; + } while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { @@ -46,13 +49,13 @@ lb_vip_command_fn (vlib_main_t * vm, gre4 = 1; else if (unformat(line_input, "encap gre6")) gre4 = 0; - else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + else { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (ip46_prefix_is_ip4(&prefix, plen)) { type = (gre4)?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6; @@ -65,17 +68,25 @@ lb_vip_command_fn (vlib_main_t * vm, u32 index; if (!del) { if ((ret = lb_vip_add(&prefix, plen, type, new_len, &index))) { - return clib_error_return (0, "lb_vip_add error %d", ret); + error = clib_error_return (0, "lb_vip_add error %d", ret); + goto done; } else { vlib_cli_output(vm, "lb_vip_add ok %d", index); } } else { - if ((ret = lb_vip_find_index(&prefix, plen, &index))) - return clib_error_return (0, "lb_vip_find_index error %d", ret); - else if ((ret = lb_vip_del(index))) - return clib_error_return (0, "lb_vip_del error %d", ret); + if ((ret = lb_vip_find_index(&prefix, plen, &index))) { + error = clib_error_return (0, "lb_vip_find_index error %d", ret); + goto done; + } else if ((ret = lb_vip_del(index))) { + error = clib_error_return (0, "lb_vip_del error %d", ret); + goto done; + } } - return NULL; + +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (lb_vip_command, static) = @@ -96,16 +107,21 @@ lb_as_command_fn (vlib_main_t * vm, u32 vip_index; u8 del = 0; int ret; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; - if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY)) - return clib_error_return (0, "invalid as address: '%U'", - format_unformat_error, line_input); + if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY)) { + error = clib_error_return (0, "invalid as address: '%U'", + format_unformat_error, line_input); + goto done; + } - if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index))) - return clib_error_return (0, "lb_vip_find_index error %d", ret); + if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index))) { + error = clib_error_return (0, "lb_vip_find_index error %d", ret); + goto done; + } while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { @@ -114,15 +130,15 @@ lb_as_command_fn (vlib_main_t * vm, } else if (unformat(line_input, "del")) { del = 1; } else { - vec_free(as_array); - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; } } if (!vec_len(as_array)) { - vec_free(as_array); - return clib_error_return (0, "No AS address provided"); + error = clib_error_return (0, "No AS address provided"); + goto done; } lb_garbage_collection(); @@ -130,18 +146,21 @@ lb_as_command_fn (vlib_main_t * vm, if (del) { if ((ret = lb_vip_del_ass(vip_index, as_array, vec_len(as_array)))) { - vec_free(as_array); - return clib_error_return (0, "lb_vip_del_ass error %d", ret); + error = clib_error_return (0, "lb_vip_del_ass error %d", ret); + goto done; } } else { if ((ret = lb_vip_add_ass(vip_index, as_array, vec_len(as_array)))) { - vec_free(as_array); - return clib_error_return (0, "lb_vip_add_ass error %d", ret); + error = clib_error_return (0, "lb_vip_add_ass error %d", ret); + goto done; } } +done: + unformat_free (line_input); vec_free(as_array); - return 0; + + return error; } VLIB_CLI_COMMAND (lb_as_command, static) = @@ -163,6 +182,7 @@ lb_conf_command_fn (vlib_main_t * vm, u32 per_cpu_sticky_buckets_log2 = 0; u32 flow_timeout = lbm->flow_timeout; int ret; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -181,19 +201,24 @@ lb_conf_command_fn (vlib_main_t * vm, per_cpu_sticky_buckets = 1 << per_cpu_sticky_buckets_log2; } else if (unformat(line_input, "timeout %d", &flow_timeout)) ; - else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + else { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - lb_garbage_collection(); - if ((ret = lb_conf(&ip4, &ip6, per_cpu_sticky_buckets, flow_timeout))) - return clib_error_return (0, "lb_conf error %d", ret); + if ((ret = lb_conf(&ip4, &ip6, per_cpu_sticky_buckets, flow_timeout))) { + error = clib_error_return (0, "lb_conf error %d", ret); + goto done; + } - return NULL; +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (lb_conf_command, static) = diff --git a/src/plugins/sixrd/sixrd.c b/src/plugins/sixrd/sixrd.c index 71fc181f..67a9a3ad 100644 --- a/src/plugins/sixrd/sixrd.c +++ b/src/plugins/sixrd/sixrd.c @@ -192,6 +192,7 @@ sixrd_add_domain_command_fn (vlib_main_t *vm, u32 num_m_args = 0; /* Optional arguments */ u32 mtu = 0; + clib_error_t *error = 0; /* Get a line of input. */ if (!unformat_user(input, unformat_line_input, line_input)) @@ -205,19 +206,25 @@ sixrd_add_domain_command_fn (vlib_main_t *vm, num_m_args++; else if (unformat(line_input, "mtu %d", &mtu)) num_m_args++; - else - return clib_error_return(0, "unknown input `%U'", - format_unformat_error, input); + else { + error = clib_error_return(0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free(line_input); - if (num_m_args < 3) - return clib_error_return(0, "mandatory argument(s) missing"); + if (num_m_args < 3) { + error = clib_error_return(0, "mandatory argument(s) missing"); + goto done; + } sixrd_create_domain(&ip6_prefix, ip6_prefix_len, &ip4_prefix, ip4_prefix_len, &ip4_src, &sixrd_domain_index, mtu); - return 0; +done: + unformat_free (line_input); + + return error; } static clib_error_t * @@ -228,6 +235,7 @@ sixrd_del_domain_command_fn (vlib_main_t *vm, unformat_input_t _line_input, *line_input = &_line_input; u32 num_m_args = 0; u32 sixrd_domain_index; + clib_error_t *error = 0; /* Get a line of input. */ if (! unformat_user(input, unformat_line_input, line_input)) @@ -236,18 +244,24 @@ sixrd_del_domain_command_fn (vlib_main_t *vm, while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { if (unformat(line_input, "index %d", &sixrd_domain_index)) num_m_args++; - else - return clib_error_return(0, "unknown input `%U'", - format_unformat_error, input); + else { + error = clib_error_return(0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free(line_input); - if (num_m_args != 1) - return clib_error_return(0, "mandatory argument(s) missing"); + if (num_m_args != 1) { + error = clib_error_return(0, "mandatory argument(s) missing"); + goto done; + } sixrd_delete_domain(sixrd_domain_index); - return 0; +done: + unformat_free (line_input); + + return error; } static u8 * diff --git a/src/plugins/snat/snat.c b/src/plugins/snat/snat.c index 73854a7a..8c2bacdb 100644 --- a/src/plugins/snat/snat.c +++ b/src/plugins/snat/snat.c @@ -1705,6 +1705,7 @@ add_address_command_fn (vlib_main_t * vm, int i, count; int is_add = 1; int rv = 0; + clib_error_t *error = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -1721,19 +1722,27 @@ add_address_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) is_add = 0; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (sm->static_mapping_only) - return clib_error_return (0, "static mapping only mode"); + { + error = clib_error_return (0, "static mapping only mode"); + goto done; + } start_host_order = clib_host_to_net_u32 (start_addr.as_u32); end_host_order = clib_host_to_net_u32 (end_addr.as_u32); if (end_host_order < start_host_order) - return clib_error_return (0, "end address less than start address"); + { + error = clib_error_return (0, "end address less than start address"); + goto done; + } count = (end_host_order - start_host_order) + 1; @@ -1755,11 +1764,11 @@ add_address_command_fn (vlib_main_t * vm, switch (rv) { case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "S-NAT address not exist."); - break; + error = clib_error_return (0, "S-NAT address not exist."); + goto done; case VNET_API_ERROR_UNSPECIFIED: - return clib_error_return (0, "S-NAT address used in static mapping."); - break; + error = clib_error_return (0, "S-NAT address used in static mapping."); + goto done; default: break; } @@ -1767,7 +1776,10 @@ add_address_command_fn (vlib_main_t * vm, increment_v4_address (&this_addr); } - return 0; +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (add_address_command, static) = { @@ -1807,10 +1819,12 @@ snat_feature_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) is_del = 1; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (vec_len (inside_sw_if_indices)) { @@ -1830,6 +1844,8 @@ snat_feature_command_fn (vlib_main_t * vm, } } +done: + unformat_free (line_input); vec_free (inside_sw_if_indices); vec_free (outside_sw_if_indices); @@ -1923,13 +1939,18 @@ add_static_mapping_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) is_add = 0; else - return clib_error_return (0, "unknown input: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (!addr_only && !proto_set) - return clib_error_return (0, "missing protocol"); + { + error = clib_error_return (0, "missing protocol"); + goto done; + } rv = snat_add_static_mapping(l_addr, e_addr, (u16) l_port, (u16) e_port, vrf_id, addr_only, sw_if_index, proto, is_add); @@ -1937,22 +1958,27 @@ add_static_mapping_command_fn (vlib_main_t * vm, switch (rv) { case VNET_API_ERROR_INVALID_VALUE: - return clib_error_return (0, "External port already in use."); - break; + error = clib_error_return (0, "External port already in use."); + goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: if (is_add) - return clib_error_return (0, "External addres must be allocated."); + error = clib_error_return (0, "External addres must be allocated."); else - return clib_error_return (0, "Mapping not exist."); - break; + error = clib_error_return (0, "Mapping not exist."); + goto done; case VNET_API_ERROR_NO_SUCH_FIB: - return clib_error_return (0, "No such VRF id."); + error = clib_error_return (0, "No such VRF id."); + goto done; case VNET_API_ERROR_VALUE_EXIST: - return clib_error_return (0, "Mapping already exist."); + error = clib_error_return (0, "Mapping already exist."); + goto done; default: break; } +done: + unformat_free (line_input); + return error; } @@ -1985,6 +2011,7 @@ set_workers_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; uword *bitmap = 0; int rv = 0; + clib_error_t *error = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -1995,13 +2022,18 @@ set_workers_command_fn (vlib_main_t * vm, if (unformat (line_input, "%U", unformat_bitmap_list, &bitmap)) ; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (bitmap == 0) - return clib_error_return (0, "List of workers must be specified."); + { + error = clib_error_return (0, "List of workers must be specified."); + goto done; + } rv = snat_set_workers(bitmap); @@ -2010,17 +2042,20 @@ set_workers_command_fn (vlib_main_t * vm, switch (rv) { case VNET_API_ERROR_INVALID_WORKER: - return clib_error_return (0, "Invalid worker(s)."); - break; + error = clib_error_return (0, "Invalid worker(s)."); + goto done; case VNET_API_ERROR_FEATURE_DISABLED: - return clib_error_return (0, + error = clib_error_return (0, "Supported only if 2 or more workes available."); - break; + goto done; default: break; } - return 0; +done: + unformat_free (line_input); + + return error; } /*? @@ -2047,6 +2082,7 @@ snat_ipfix_logging_enable_disable_command_fn (vlib_main_t * vm, u32 src_port = 0; u8 enable = 1; int rv = 0; + clib_error_t *error = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -2061,17 +2097,25 @@ snat_ipfix_logging_enable_disable_command_fn (vlib_main_t * vm, else if (unformat (line_input, "disable")) enable = 0; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); rv = snat_ipfix_logging_enable_disable (enable, domain_id, (u16) src_port); if (rv) - return clib_error_return (0, "ipfix logging enable failed"); + { + error = clib_error_return (0, "ipfix logging enable failed"); + goto done; + } - return 0; +done: + unformat_free (line_input); + + return error; } /*? @@ -2604,6 +2648,7 @@ snat_add_interface_address_command_fn (vlib_main_t * vm, u32 sw_if_index; int rv; int is_del = 0; + clib_error_t *error = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -2617,8 +2662,11 @@ snat_add_interface_address_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) is_del = 1; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } rv = snat_add_interface_address (sm, sw_if_index, is_del); @@ -2629,10 +2677,15 @@ snat_add_interface_address_command_fn (vlib_main_t * vm, break; default: - return clib_error_return (0, "snat_add_interface_address returned %d", - rv); + error = clib_error_return (0, "snat_add_interface_address returned %d", + rv); + goto done; } - return 0; + +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (snat_add_interface_address_command, static) = { diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index 54cc1aed..36f8109e 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -163,21 +163,31 @@ trace_frame_queue (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "index %u", &index)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (enable > 1) - return clib_error_return (0, "expecting on or off"); + { + error = clib_error_return (0, "expecting on or off"); + goto done; + } if (vec_len (tm->frame_queue_mains) == 0) - return clib_error_return (0, "no worker handoffs exist"); + { + error = clib_error_return (0, "no worker handoffs exist"); + goto done; + } if (index > vec_len (tm->frame_queue_mains) - 1) - return clib_error_return (0, - "expecting valid worker handoff queue index"); + { + error = clib_error_return (0, + "expecting valid worker handoff queue index"); + goto done; + } fqm = vec_elt_at_index (tm->frame_queue_mains, index); @@ -185,7 +195,7 @@ trace_frame_queue (vlib_main_t * vm, unformat_input_t * input, if (num_fq == 0) { vlib_cli_output (vm, "No frame queues exist\n"); - return error; + goto done; } // Allocate storage for trace if necessary @@ -204,6 +214,10 @@ trace_frame_queue (vlib_main_t * vm, unformat_input_t * input, memset (fqh, 0, sizeof (*fqh)); fqm->vlib_frame_queues[fqix]->trace = enable; } + +done: + unformat_free (line_input); + return error; } @@ -432,28 +446,33 @@ test_frame_queue_nelts (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "index %u", &index)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (index > vec_len (tm->frame_queue_mains) - 1) - return clib_error_return (0, - "expecting valid worker handoff queue index"); + { + error = clib_error_return (0, + "expecting valid worker handoff queue index"); + goto done; + } fqm = vec_elt_at_index (tm->frame_queue_mains, index); if ((nelts != 4) && (nelts != 8) && (nelts != 16) && (nelts != 32)) { - return clib_error_return (0, "expecting 4,8,16,32"); + error = clib_error_return (0, "expecting 4,8,16,32"); + goto done; } num_fq = vec_len (fqm->vlib_frame_queues); if (num_fq == 0) { vlib_cli_output (vm, "No frame queues exist\n"); - return error; + goto done; } for (fqix = 0; fqix < num_fq; fqix++) @@ -461,6 +480,9 @@ test_frame_queue_nelts (vlib_main_t * vm, unformat_input_t * input, fqm->vlib_frame_queues[fqix]->nelts = nelts; } +done: + unformat_free (line_input); + return error; } @@ -499,15 +521,19 @@ test_frame_queue_threshold (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "index %u", &index)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (index > vec_len (tm->frame_queue_mains) - 1) - return clib_error_return (0, - "expecting valid worker handoff queue index"); + { + error = clib_error_return (0, + "expecting valid worker handoff queue index"); + goto done; + } fqm = vec_elt_at_index (tm->frame_queue_mains, index); @@ -515,7 +541,7 @@ test_frame_queue_threshold (vlib_main_t * vm, unformat_input_t * input, if (threshold == ~(u32) 0) { vlib_cli_output (vm, "expecting threshold value\n"); - return error; + goto done; } if (threshold == 0) @@ -525,7 +551,7 @@ test_frame_queue_threshold (vlib_main_t * vm, unformat_input_t * input, if (num_fq == 0) { vlib_cli_output (vm, "No frame queues exist\n"); - return error; + goto done; } for (fqix = 0; fqix < num_fq; fqix++) @@ -533,6 +559,9 @@ test_frame_queue_threshold (vlib_main_t * vm, unformat_input_t * input, fqm->vlib_frame_queues[fqix]->vector_threshold = threshold; } +done: + unformat_free (line_input); + return error; } diff --git a/src/vlib/trace.c b/src/vlib/trace.c index dcdb837f..6d487ae1 100644 --- a/src/vlib/trace.c +++ b/src/vlib/trace.c @@ -372,6 +372,7 @@ cli_add_trace_buffer (vlib_main_t * vm, vlib_trace_node_t *tn; u32 node_index, add; u8 verbose = 0; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -384,8 +385,11 @@ cli_add_trace_buffer (vlib_main_t * vm, else if (unformat (line_input, "verbose")) verbose = 1; else - return clib_error_create ("expected NODE COUNT, got `%U'", - format_unformat_error, line_input); + { + error = clib_error_create ("expected NODE COUNT, got `%U'", + format_unformat_error, line_input); + goto done; + } } /* *INDENT-OFF* */ @@ -403,7 +407,10 @@ cli_add_trace_buffer (vlib_main_t * vm, })); /* *INDENT-ON* */ - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index 69fca6ec..88e2453c 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -2835,6 +2835,7 @@ unix_cli_set_terminal_pager (vlib_main_t * vm, unix_cli_main_t *cm = &unix_cli_main; unix_cli_file_t *cf; unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -2852,13 +2853,17 @@ unix_cli_set_terminal_pager (vlib_main_t * vm, "Pager limit set to %u lines; note, this is global.\n", um->cli_pager_buffer_limit); else - return clib_error_return (0, "unknown parameter: `%U`", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown parameter: `%U`", + format_unformat_error, line_input); + goto done; + } } +done: unformat_free (line_input); - return 0; + return error; } /*? @@ -2886,6 +2891,7 @@ unix_cli_set_terminal_history (vlib_main_t * vm, unix_cli_file_t *cf; unformat_input_t _line_input, *line_input = &_line_input; u32 limit; + clib_error_t *error = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -2901,8 +2907,11 @@ unix_cli_set_terminal_history (vlib_main_t * vm, else if (unformat (line_input, "limit %u", &cf->history_limit)) ; else - return clib_error_return (0, "unknown parameter: `%U`", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown parameter: `%U`", + format_unformat_error, line_input); + goto done; + } /* If we reduced history size, or turned it off, purge the history */ limit = cf->has_history ? cf->history_limit : 0; @@ -2914,9 +2923,10 @@ unix_cli_set_terminal_history (vlib_main_t * vm, } } +done: unformat_free (line_input); - return 0; + return error; } /*? diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c index 6baa26e1..d4aa7016 100644 --- a/src/vnet/devices/af_packet/cli.c +++ b/src/vnet/devices/af_packet/cli.c @@ -49,6 +49,7 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, u8 *hw_addr_ptr = 0; u32 sw_if_index; int r; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -63,29 +64,47 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) hw_addr_ptr = hwaddr; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (host_if_name == NULL) - return clib_error_return (0, "missing host interface name"); + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } r = af_packet_create_if (vm, host_if_name, hw_addr_ptr, &sw_if_index); - vec_free (host_if_name); if (r == VNET_API_ERROR_SYSCALL_ERROR_1) - return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + { + error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + goto done; + } if (r == VNET_API_ERROR_INVALID_INTERFACE) - return clib_error_return (0, "Invalid interface name"); + { + error = clib_error_return (0, "Invalid interface name"); + goto done; + } if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) - return clib_error_return (0, "Interface elready exists"); + { + error = clib_error_return (0, "Interface elready exists"); + goto done; + } vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index); - return 0; + +done: + vec_free (host_if_name); + unformat_free (line_input); + + return error; } /*? @@ -124,6 +143,7 @@ af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; u8 *host_if_name = NULL; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -134,18 +154,26 @@ af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, if (unformat (line_input, "name %s", &host_if_name)) ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (host_if_name == NULL) - return clib_error_return (0, "missing host interface name"); + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } af_packet_delete_if (vm, host_if_name); + +done: vec_free (host_if_name); + unformat_free (line_input); - return 0; + return error; } /*? diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c index d133cfd9..1fc665ac 100644 --- a/src/vnet/devices/dpdk/cli.c +++ b/src/vnet/devices/dpdk/cli.c @@ -398,7 +398,7 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, u32 hw_if_index = (u32) ~ 0; u32 nb_rx_desc = (u32) ~ 0; u32 nb_tx_desc = (u32) ~ 0; - clib_error_t *rv; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -414,25 +414,37 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "rx %d", &nb_rx_desc)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - return clib_error_return (0, "number of descriptors can be set only for " - "physical devices"); + { + error = + clib_error_return (0, + "number of descriptors can be set only for " + "physical devices"); + goto done; + } if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) && (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc)) - return clib_error_return (0, "nothing changed"); + { + error = clib_error_return (0, "nothing changed"); + goto done; + } if (nb_rx_desc != (u32) ~ 0) xd->nb_rx_desc = nb_rx_desc; @@ -440,9 +452,12 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, if (nb_tx_desc != (u32) ~ 0) xd->nb_tx_desc = nb_tx_desc; - rv = dpdk_port_setup (dm, xd); + error = dpdk_port_setup (dm, xd); + +done: + unformat_free (line_input); - return rv; + return error; } /* *INDENT-OFF* */ @@ -523,6 +538,7 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, u32 queue = (u32) 0; u32 cpu = (u32) ~ 0; int i; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -538,18 +554,25 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "thread %d", &cpu)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } if (cpu < dm->input_cpu_first_index || cpu >= (dm->input_cpu_first_index + dm->input_cpu_count)) - return clib_error_return (0, "please specify valid thread id"); + { + error = clib_error_return (0, "please specify valid thread id"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -563,7 +586,7 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, queue == dq->queue_id) { if (cpu == i) /* nothing to do */ - return 0; + goto done; vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]); vec_add2(dm->devices_by_cpu[cpu], dq, 1); @@ -586,13 +609,18 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, VLIB_NODE_STATE_POLLING); - return 0; + goto done; } } /* *INDENT-ON* */ } - return clib_error_return (0, "not found"); + error = clib_error_return (0, "not found"); + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -653,6 +681,7 @@ set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, u32 hw_if_index = (u32) ~ 0; u32 cpu = (u32) ~ 0; int i; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -666,18 +695,22 @@ set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "thread %d", &cpu)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) return clib_error_return (0, "please specify valid interface name"); if (cpu < dm->hqos_cpu_first_index || cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) - return clib_error_return (0, "please specify valid thread id"); + { + error = clib_error_return (0, "please specify valid thread id"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -689,7 +722,7 @@ set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index) { if (cpu == i) /* nothing to do */ - return 0; + goto done; vec_del1 (dm->devices_by_hqos_cpu[i], dq - dm->devices_by_hqos_cpu[i]); @@ -703,12 +736,17 @@ set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, vec_sort_with_function (dm->devices_by_hqos_cpu[cpu], dpdk_device_queue_sort); - return 0; + goto done; } } } - return clib_error_return (0, "not found"); + error = clib_error_return (0, "not found"); + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -732,6 +770,7 @@ set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, u32 pipe_id = (u32) ~ 0; u32 profile_id = (u32) ~ 0; int rv; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -749,14 +788,18 @@ set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "profile %d", &profile_id)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -765,9 +808,15 @@ set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, profile_id); if (rv) - return clib_error_return (0, "pipe configuration failed"); + { + error = clib_error_return (0, "pipe configuration failed"); + goto done; + } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -797,6 +846,7 @@ set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, .tc_period = 10, }; int rv; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -829,23 +879,33 @@ set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "period %d", &p.tc_period)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); if (rv) - return clib_error_return (0, "subport configuration failed"); + { + error = clib_error_return (0, "subport configuration failed"); + goto done; + } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -872,6 +932,7 @@ set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, u32 queue = (u32) ~ 0; u32 entry = (u32) ~ 0; u32 val, i; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -889,20 +950,33 @@ set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "queue %d", &queue)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } if (entry >= 64) - return clib_error_return (0, "invalid entry"); + { + error = clib_error_return (0, "invalid entry"); + goto done; + } if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - return clib_error_return (0, "invalid traffic class"); + { + error = clib_error_return (0, "invalid traffic class"); + goto done; + } if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - return clib_error_return (0, "invalid traffic class"); + { + error = clib_error_return (0, "invalid traffic class"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -911,7 +985,10 @@ set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); /* Should never happen, shut up Coverity warning */ if (p == 0) - return clib_error_return (0, "no worker registrations?"); + { + error = clib_error_return (0, "no worker registrations?"); + goto done; + } vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; int worker_thread_first = tr->first_index; @@ -921,7 +998,10 @@ set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, for (i = 0; i < worker_thread_count; i++) xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -939,6 +1019,7 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, unformat_input_t _line_input, *line_input = &_line_input; vlib_thread_main_t *tm = vlib_get_thread_main (); dpdk_main_t *dm = &dpdk_main; + clib_error_t *error = NULL; /* Device specific data */ struct rte_eth_dev_info dev_info; @@ -984,15 +1065,19 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "mask %llx", &mask)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - /* Get interface */ if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -1019,7 +1104,7 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, if (devconf->hqos_enabled == 0) { vlib_cli_output (vm, "HQoS disabled for this interface"); - return 0; + goto done; } n_subports_per_port = devconf->hqos.port.n_subports_per_port; @@ -1028,27 +1113,39 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, /* Validate packet field configuration: id, offset and mask */ if (id >= 3) - return clib_error_return (0, "invalid packet field id"); + { + error = clib_error_return (0, "invalid packet field id"); + goto done; + } switch (id) { case 0: if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0) - return clib_error_return (0, "invalid subport ID mask " - "(n_subports_per_port = %u)", - n_subports_per_port); + { + error = clib_error_return (0, "invalid subport ID mask " + "(n_subports_per_port = %u)", + n_subports_per_port); + goto done; + } break; case 1: if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0) - return clib_error_return (0, "invalid pipe ID mask " - "(n_pipes_per_subport = %u)", - n_pipes_per_subport); + { + error = clib_error_return (0, "invalid pipe ID mask " + "(n_pipes_per_subport = %u)", + n_pipes_per_subport); + goto done; + } break; case 2: default: if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0) - return clib_error_return (0, "invalid TC table index mask " - "(TC table size = %u)", tctbl_size); + { + error = clib_error_return (0, "invalid TC table index mask " + "(TC table size = %u)", tctbl_size); + goto done; + } } /* Propagate packet field configuration to all workers */ @@ -1075,7 +1172,10 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, __builtin_ctzll (mask); } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -1106,6 +1206,7 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, dpdk_device_config_t *devconf = 0; vlib_thread_registration_t *tr; uword *p = 0; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -1117,14 +1218,18 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, &hw_if_index)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify interface name!!"); + { + error = clib_error_return (0, "please specify interface name!!"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -1151,7 +1256,7 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, if (devconf->hqos_enabled == 0) { vlib_cli_output (vm, "HQoS disabled for this interface"); - return 0; + goto done; } /* Detect the set of worker threads */ @@ -1159,7 +1264,10 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, /* Should never happen, shut up Coverity warning */ if (p == 0) - return clib_error_return (0, "no worker registrations?"); + { + error = clib_error_return (0, "no worker registrations?"); + goto done; + } tr = (vlib_thread_registration_t *) p[0]; @@ -1284,7 +1392,10 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, } #endif - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -1315,6 +1426,7 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, u32 qindex; struct rte_sched_queue_stats stats; u16 qlen; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -1339,14 +1451,18 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify interface name!!"); + { + error = clib_error_return (0, "please specify interface name!!"); + goto done; + } hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); xd = vec_elt_at_index (dm->devices, hw->dev_instance); @@ -1373,7 +1489,7 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, if (devconf->hqos_enabled == 0) { vlib_cli_output (vm, "HQoS disabled for this interface"); - return 0; + goto done; } /* @@ -1386,7 +1502,10 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) != 0) - return clib_error_return (0, "failed to read stats"); + { + error = clib_error_return (0, "failed to read stats"); + goto done; + } vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value"); vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts); @@ -1399,7 +1518,10 @@ show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/devices/dpdk/ipsec/cli.c b/src/vnet/devices/dpdk/ipsec/cli.c index 93df4a64..f9d3a5d0 100644 --- a/src/vnet/devices/dpdk/ipsec/cli.c +++ b/src/vnet/devices/dpdk/ipsec/cli.c @@ -111,6 +111,7 @@ lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; u16 detail = 0; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -120,15 +121,19 @@ lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, if (unformat (line_input, "verbose")) detail = 1; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - dpdk_ipsec_show_mapping (vm, detail); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/devices/netmap/cli.c b/src/vnet/devices/netmap/cli.c index 6157f27c..71363294 100644 --- a/src/vnet/devices/netmap/cli.c +++ b/src/vnet/devices/netmap/cli.c @@ -37,6 +37,7 @@ netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, u8 is_pipe = 0; u8 is_master = 0; u32 sw_if_index = ~0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -57,30 +58,48 @@ netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "slave")) is_master = 0; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (host_if_name == NULL) - return clib_error_return (0, "missing host interface name"); + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } r = netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master, &sw_if_index); if (r == VNET_API_ERROR_SYSCALL_ERROR_1) - return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + { + error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + goto done; + } if (r == VNET_API_ERROR_INVALID_INTERFACE) - return clib_error_return (0, "Invalid interface name"); + { + error = clib_error_return (0, "Invalid interface name"); + goto done; + } if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) - return clib_error_return (0, "Interface already exists"); + { + error = clib_error_return (0, "Interface already exists"); + goto done; + } vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index); - return 0; + +done: + unformat_free (line_input); + + return error; } /*? @@ -144,6 +163,7 @@ netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; u8 *host_if_name = NULL; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -154,17 +174,25 @@ netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, if (unformat (line_input, "name %s", &host_if_name)) ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (host_if_name == NULL) - return clib_error_return (0, "missing host interface name"); + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } netmap_delete_if (vm, host_if_name); - return 0; +done: + unformat_free (line_input); + + return error; } /*? diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 315daa77..c43f6e67 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2682,6 +2682,7 @@ vhost_user_connect_command_fn (vlib_main_t * vm, u32 custom_dev_instance = ~0; u8 hwaddr[6]; u8 *hw = NULL; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -2704,10 +2705,12 @@ vhost_user_connect_command_fn (vlib_main_t * vm, renumber = 1; } else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); vnet_main_t *vnm = vnet_get_main (); @@ -2716,14 +2719,18 @@ vhost_user_connect_command_fn (vlib_main_t * vm, is_server, &sw_if_index, feature_mask, renumber, custom_dev_instance, hw))) { - vec_free (sock_filename); - return clib_error_return (0, "vhost_user_create_if returned %d", rv); + error = clib_error_return (0, "vhost_user_create_if returned %d", rv); + goto done; } - vec_free (sock_filename); vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index); - return 0; + +done: + vec_free (sock_filename); + unformat_free (line_input); + + return error; } clib_error_t * @@ -2734,6 +2741,7 @@ vhost_user_delete_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u32 sw_if_index = ~0; vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -2751,15 +2759,25 @@ vhost_user_delete_command_fn (vlib_main_t * vm, vnet_get_sup_hw_interface (vnm, sw_if_index); if (hwif == NULL || vhost_user_dev_class.index != hwif->dev_class_index) - return clib_error_return (0, "Not a vhost interface"); + { + error = clib_error_return (0, "Not a vhost interface"); + goto done; + } } else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); + vhost_user_delete_if (vnm, vm, sw_if_index); - return 0; + +done: + unformat_free (line_input); + + return error; } int @@ -3286,6 +3304,7 @@ vhost_thread_command_fn (vlib_main_t * vm, u32 sw_if_index; u8 del = 0; int rv; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -3295,9 +3314,9 @@ vhost_thread_command_fn (vlib_main_t * vm, (line_input, "%U %d", unformat_vnet_sw_interface, vnet_get_main (), &sw_if_index, &worker_thread_index)) { - unformat_free (line_input); - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; } if (unformat (line_input, "del")) @@ -3305,9 +3324,16 @@ vhost_thread_command_fn (vlib_main_t * vm, if ((rv = vhost_user_thread_placement (sw_if_index, worker_thread_index, del))) - return clib_error_return (0, "vhost_user_thread_placement returned %d", - rv); - return 0; + { + error = clib_error_return (0, "vhost_user_thread_placement returned %d", + rv); + goto done; + } + +done: + unformat_free (line_input); + + return error; } diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c index d624587d..d4476ac4 100644 --- a/src/vnet/gre/interface.c +++ b/src/vnet/gre/interface.c @@ -491,6 +491,7 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, u32 num_m_args = 0; u8 is_add = 1; u32 sw_if_index; + clib_error_t *error = NULL; /* Get a line of input. */ if (! unformat_user (input, unformat_line_input, line_input)) @@ -508,16 +509,24 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "teb")) teb = 1; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args < 2) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } if (memcmp (&src, &dst, sizeof(src)) == 0) - return clib_error_return (0, "src and dst are identical"); + { + error = clib_error_return (0, "src and dst are identical"); + goto done; + } memset (a, 0, sizeof (*a)); a->outer_fib_id = outer_fib_id; @@ -536,15 +545,21 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); break; case VNET_API_ERROR_INVALID_VALUE: - return clib_error_return (0, "GRE tunnel already exists..."); + error = clib_error_return (0, "GRE tunnel already exists..."); + goto done; case VNET_API_ERROR_NO_SUCH_FIB: - return clib_error_return (0, "outer fib ID %d doesn't exist\n", - outer_fib_id); + error = clib_error_return (0, "outer fib ID %d doesn't exist\n", + outer_fib_id); + goto done; default: - return clib_error_return (0, "vnet_gre_add_del_tunnel returned %d", rv); + error = clib_error_return (0, "vnet_gre_add_del_tunnel returned %d", rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = { diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c index d461cc88..3af32f2e 100644 --- a/src/vnet/ip/ip4_source_check.c +++ b/src/vnet/ip/ip4_source_check.c @@ -399,6 +399,8 @@ set_ip_source_check (vlib_main_t * vm, vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, is_del == 0, &config, sizeof (config)); done: + unformat_free (line_input); + return error; } @@ -531,7 +533,9 @@ ip_source_check_accept (vlib_main_t * vm, } done: - return (error); + unformat_free (line_input); + + return error; } /*? diff --git a/src/vnet/ip/ip4_test.c b/src/vnet/ip/ip4_test.c index 45d17113..73dabfdc 100644 --- a/src/vnet/ip/ip4_test.c +++ b/src/vnet/ip/ip4_test.c @@ -143,8 +143,11 @@ thrash (vlib_main_t * vm, else if (unformat (line_input, "verbose")) verbose = 1; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } } @@ -178,7 +181,7 @@ thrash (vlib_main_t * vm, if (p == 0) { vlib_cli_output (vm, "Couldn't map fib id %d to fib index\n", table_id); - return 0; + goto done; } table_index = p[0]; @@ -294,7 +297,11 @@ thrash (vlib_main_t * vm, pool_free (tm->route_pool); } - return 0; + +done: + unformat_free (line_input); + + return error; } /*? diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 7229591e..6b53137f 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -2923,7 +2923,10 @@ ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, else if (unformat (line_input, "ra-lifetime")) { if (!unformat (line_input, "%d", &ra_lifetime)) - return (error = unformat_parse_error (line_input)); + { + error = unformat_parse_error (line_input); + goto done; + } use_lifetime = 1; break; } @@ -2931,13 +2934,19 @@ ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, { if (!unformat (line_input, "%d %d", &ra_initial_count, &ra_initial_interval)) - return (error = unformat_parse_error (line_input)); + { + error = unformat_parse_error (line_input); + goto done; + } break; } else if (unformat (line_input, "ra-interval")) { if (!unformat (line_input, "%d", &ra_max_interval)) - return (error = unformat_parse_error (line_input)); + { + error = unformat_parse_error (line_input); + goto done; + } if (!unformat (line_input, "%d", &ra_min_interval)) ra_min_interval = 0; @@ -2949,7 +2958,10 @@ ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, break; } else - return (unformat_parse_error (line_input)); + { + error = unformat_parse_error (line_input); + goto done; + } } if (add_radv_info) @@ -3006,7 +3018,10 @@ ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, else if (unformat (line_input, "no-onlink")) no_onlink = 1; else - return (unformat_parse_error (line_input)); + { + error = unformat_parse_error (line_input); + goto done; + } } ip6_neighbor_ra_prefix (vm, sw_if_index, @@ -3018,9 +3033,9 @@ ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, off_link, no_autoconfig, no_onlink, is_no); } +done: unformat_free (line_input); -done: return error; } diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 0ef0e7a6..807b87b6 100644 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -568,8 +568,6 @@ vnet_ip_route_cmd (vlib_main_t * vm, } } - unformat_free (line_input); - if (vec_len (prefixs) == 0) { error = @@ -704,6 +702,7 @@ done: vec_free (dpos); vec_free (prefixs); vec_free (rpaths); + unformat_free (line_input); return error; } @@ -872,8 +871,6 @@ vnet_ip_mroute_cmd (vlib_main_t * vm, } } - unformat_free (line_input); - if (~0 == table_id) { /* @@ -970,6 +967,8 @@ vnet_ip_mroute_cmd (vlib_main_t * vm, (scount * gcount) / (timet[1] - timet[0])); done: + unformat_free (line_input); + return error; } @@ -1149,24 +1148,37 @@ probe_neighbor_address (vlib_main_t * vm, is_ip4 = 0; } else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (sw_if_index == ~0) - return clib_error_return (0, "Interface required, not set."); + { + error = clib_error_return (0, "Interface required, not set."); + goto done; + } if (address_set == 0) - return clib_error_return (0, "ip address required, not set."); + { + error = clib_error_return (0, "ip address required, not set."); + goto done; + } if (address_set > 1) - return clib_error_return (0, "Multiple ip addresses not supported."); + { + error = clib_error_return (0, "Multiple ip addresses not supported."); + goto done; + } if (is_ip4) error = ip4_probe_neighbor_wait (vm, &a4, sw_if_index, retry_count); else error = ip6_probe_neighbor_wait (vm, &a6, sw_if_index, retry_count); +done: + unformat_free (line_input); + return error; } diff --git a/src/vnet/ipsec-gre/interface.c b/src/vnet/ipsec-gre/interface.c index 3b6e4ac2..0772ce73 100644 --- a/src/vnet/ipsec-gre/interface.c +++ b/src/vnet/ipsec-gre/interface.c @@ -232,6 +232,7 @@ create_ipsec_gre_tunnel_command_fn (vlib_main_t * vm, vnet_ipsec_gre_add_del_tunnel_args_t _a, *a = &_a; int rv; u32 sw_if_index; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -250,16 +251,24 @@ create_ipsec_gre_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "remote-sa %d", &rsa)) num_m_args++; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args < 4) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } if (memcmp (&src, &dst, sizeof (src)) == 0) - return clib_error_return (0, "src and dst are identical"); + { + error = clib_error_return (0, "src and dst are identical"); + goto done; + } memset (a, 0, sizeof (*a)); a->is_add = is_add; @@ -277,14 +286,19 @@ create_ipsec_gre_tunnel_command_fn (vlib_main_t * vm, vnet_get_main (), sw_if_index); break; case VNET_API_ERROR_INVALID_VALUE: - return clib_error_return (0, "GRE tunnel already exists..."); + error = clib_error_return (0, "GRE tunnel already exists..."); + goto done; default: - return clib_error_return (0, - "vnet_ipsec_gre_add_del_tunnel returned %d", - rv); + error = clib_error_return (0, + "vnet_ipsec_gre_add_del_tunnel returned %d", + rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c index 3c1e26f2..0e034402 100644 --- a/src/vnet/ipsec/ipsec_cli.c +++ b/src/vnet/ipsec/ipsec_cli.c @@ -32,6 +32,7 @@ set_interface_spd_command_fn (vlib_main_t * vm, u32 sw_if_index = (u32) ~ 0; u32 spd_id; int is_add = 1; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -43,14 +44,18 @@ set_interface_spd_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) is_add = 0; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - - unformat_free (line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } ipsec_set_interface_spd (vm, sw_if_index, spd_id, is_add); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -72,7 +77,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, ipsec_sa_t sa; int is_add = ~0; u8 *ck = 0, *ik = 0; - clib_error_t *err = 0; + clib_error_t *error = NULL; memset (&sa, 0, sizeof (sa)); @@ -90,8 +95,11 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, else if (unformat (line_input, "esp")) sa.protocol = IPSEC_PROTOCOL_ESP; else if (unformat (line_input, "ah")) - //sa.protocol = IPSEC_PROTOCOL_AH; - return clib_error_return (0, "unsupported security protocol 'AH'"); + { + //sa.protocol = IPSEC_PROTOCOL_AH; + error = clib_error_return (0, "unsupported security protocol 'AH'"); + goto done; + } else if (unformat (line_input, "crypto-key %U", unformat_hex_string, &ck)) sa.crypto_key_len = vec_len (ck); @@ -102,8 +110,12 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, { if (sa.crypto_alg < IPSEC_CRYPTO_ALG_AES_CBC_128 || sa.crypto_alg >= IPSEC_CRYPTO_N_ALG) - return clib_error_return (0, "unsupported crypto-alg: '%U'", - format_ipsec_crypto_alg, sa.crypto_alg); + { + error = clib_error_return (0, "unsupported crypto-alg: '%U'", + format_ipsec_crypto_alg, + sa.crypto_alg); + goto done; + } } else if (unformat (line_input, "integ-key %U", unformat_hex_string, &ik)) @@ -113,8 +125,12 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, { if (sa.integ_alg < IPSEC_INTEG_ALG_SHA1_96 || sa.integ_alg >= IPSEC_INTEG_N_ALG) - return clib_error_return (0, "unsupported integ-alg: '%U'", - format_ipsec_integ_alg, sa.integ_alg); + { + error = clib_error_return (0, "unsupported integ-alg: '%U'", + format_ipsec_integ_alg, + sa.integ_alg); + goto done; + } } else if (unformat (line_input, "tunnel-src %U", unformat_ip4_address, &sa.tunnel_src_addr.ip4)) @@ -135,12 +151,13 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, sa.is_tunnel_ip6 = 1; } else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (sa.crypto_key_len > sizeof (sa.crypto_key)) sa.crypto_key_len = sizeof (sa.crypto_key); @@ -156,14 +173,17 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm, if (is_add) { ASSERT (im->cb.check_support_cb); - err = im->cb.check_support_cb (&sa); - if (err) - return err; + error = im->cb.check_support_cb (&sa); + if (error) + goto done; } ipsec_add_del_sa (vm, &sa, is_add); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -183,6 +203,7 @@ ipsec_spd_add_del_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u32 spd_id = ~0; int is_add = ~0; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -196,18 +217,25 @@ ipsec_spd_add_del_command_fn (vlib_main_t * vm, else if (unformat (line_input, "%u", &spd_id)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (spd_id == ~0) - return clib_error_return (0, "please specify SPD ID"); + { + error = clib_error_return (0, "please specify SPD ID"); + goto done; + } ipsec_add_del_spd (vm, spd_id, is_add); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -230,6 +258,7 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm, int is_add = 0; int is_ip_any = 1; u32 tmp, tmp2; + clib_error_t *error = NULL; memset (&p, 0, sizeof (p)); p.lport.stop = p.rport.stop = ~0; @@ -262,7 +291,10 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm, &p.policy)) { if (p.policy == IPSEC_POLICY_ACTION_RESOLVE) - return clib_error_return (0, "unsupported action: 'resolve'"); + { + error = clib_error_return (0, "unsupported action: 'resolve'"); + goto done; + } } else if (unformat (line_input, "sa %u", &p.sa_id)) ; @@ -300,19 +332,24 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm, p.rport.stop = tmp2; } else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - ipsec_add_del_policy (vm, &p, is_add); if (is_ip_any) { p.is_ipv6 = 1; ipsec_add_del_policy (vm, &p, is_add); } - return 0; + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -332,6 +369,7 @@ set_ipsec_sa_key_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; ipsec_sa_t sa; u8 *ck = 0, *ik = 0; + clib_error_t *error = NULL; memset (&sa, 0, sizeof (sa)); @@ -349,12 +387,13 @@ set_ipsec_sa_key_command_fn (vlib_main_t * vm, if (unformat (line_input, "integ-key %U", unformat_hex_string, &ik)) sa.integ_key_len = vec_len (ik); else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (sa.crypto_key_len > sizeof (sa.crypto_key)) sa.crypto_key_len = sizeof (sa.crypto_key); @@ -369,7 +408,10 @@ set_ipsec_sa_key_command_fn (vlib_main_t * vm, ipsec_set_sa_key (vm, &sa); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -649,6 +691,7 @@ create_ipsec_tunnel_command_fn (vlib_main_t * vm, ipsec_add_del_tunnel_args_t a; int rv; u32 num_m_args = 0; + clib_error_t *error = NULL; memset (&a, 0, sizeof (a)); a.is_add = 1; @@ -673,13 +716,18 @@ create_ipsec_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "del")) a.is_add = 0; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args < 4) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } rv = ipsec_add_del_tunnel_if (&a); @@ -689,16 +737,21 @@ create_ipsec_tunnel_command_fn (vlib_main_t * vm, break; case VNET_API_ERROR_INVALID_VALUE: if (a.is_add) - return clib_error_return (0, - "IPSec tunnel interface already exists..."); + error = clib_error_return (0, + "IPSec tunnel interface already exists..."); else - return clib_error_return (0, "IPSec tunnel interface not exists..."); + error = clib_error_return (0, "IPSec tunnel interface not exists..."); + goto done; default: - return clib_error_return (0, "ipsec_register_interface returned %d", - rv); + error = clib_error_return (0, "ipsec_register_interface returned %d", + rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -720,6 +773,7 @@ set_interface_key_command_fn (vlib_main_t * vm, u32 hw_if_index = (u32) ~ 0; u32 alg; u8 *key = 0; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -748,25 +802,38 @@ set_interface_key_command_fn (vlib_main_t * vm, else if (unformat (line_input, "%U", unformat_hex_string, &key)) ; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (type == IPSEC_IF_SET_KEY_TYPE_NONE) - return clib_error_return (0, "unknown key type"); + { + error = clib_error_return (0, "unknown key type"); + goto done; + } if (alg > 0 && vec_len (key) == 0) - return clib_error_return (0, "key is not specified"); + { + error = clib_error_return (0, "key is not specified"); + goto done; + } if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "interface not specified"); + { + error = clib_error_return (0, "interface not specified"); + goto done; + } ipsec_set_interface_key (im->vnet_main, hw_if_index, type, alg, key); + +done: vec_free (key); + unformat_free (line_input); - return 0; + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c index 5e4691f4..ff3d2f3a 100644 --- a/src/vnet/l2/l2_patch.c +++ b/src/vnet/l2/l2_patch.c @@ -315,6 +315,7 @@ test_patch_command_fn (vlib_main_t * vm, int rx_set = 0; int tx_set = 0; int is_add = 1; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -335,10 +336,16 @@ test_patch_command_fn (vlib_main_t * vm, } if (rx_set == 0) - return clib_error_return (0, "rx interface not set"); + { + error = clib_error_return (0, "rx interface not set"); + goto done; + } if (tx_set == 0) - return clib_error_return (0, "tx interface not set"); + { + error = clib_error_return (0, "tx interface not set"); + goto done; + } rv = vnet_l2_patch_add_del (rx_sw_if_index, tx_sw_if_index, is_add); @@ -348,17 +355,24 @@ test_patch_command_fn (vlib_main_t * vm, break; case VNET_API_ERROR_INVALID_SW_IF_INDEX: - return clib_error_return (0, "rx interface not a physical port"); + error = clib_error_return (0, "rx interface not a physical port"); + goto done; case VNET_API_ERROR_INVALID_SW_IF_INDEX_2: - return clib_error_return (0, "tx interface not a physical port"); + error = clib_error_return (0, "tx interface not a physical port"); + goto done; default: - return clib_error_return + error = clib_error_return (0, "WARNING: vnet_l2_patch_add_del returned %d", rv); + goto done; } - return 0; + +done: + unformat_free (line_input); + + return error; } /*? diff --git a/src/vnet/l2/l2_xcrw.c b/src/vnet/l2/l2_xcrw.c index 70610a85..d08a5d8f 100644 --- a/src/vnet/l2/l2_xcrw.c +++ b/src/vnet/l2/l2_xcrw.c @@ -409,6 +409,7 @@ set_l2_xcrw_command_fn (vlib_main_t * vm, u8 *rw = 0; vnet_main_t *vnm = vnet_get_main (); int rv; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) @@ -416,8 +417,11 @@ set_l2_xcrw_command_fn (vlib_main_t * vm, if (!unformat (line_input, "%U", unformat_vnet_sw_interface, vnm, &l2_sw_if_index)) - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { @@ -436,7 +440,10 @@ set_l2_xcrw_command_fn (vlib_main_t * vm, } if (next_node_index == ~0) - return clib_error_return (0, "next node not specified"); + { + error = clib_error_return (0, "next node not specified"); + goto done; + } if (tx_fib_id != ~0) { @@ -448,7 +455,11 @@ set_l2_xcrw_command_fn (vlib_main_t * vm, p = hash_get (ip4_main.fib_index_by_table_id, tx_fib_id); if (p == 0) - return clib_error_return (0, "nonexistent tx_fib_id %d", tx_fib_id); + { + error = + clib_error_return (0, "nonexistent tx_fib_id %d", tx_fib_id); + goto done; + } tx_fib_index = p[0]; } @@ -463,16 +474,21 @@ set_l2_xcrw_command_fn (vlib_main_t * vm, break; case VNET_API_ERROR_INVALID_SW_IF_INDEX: - return clib_error_return (0, "%U not cross-connected", - format_vnet_sw_if_index_name, - vnm, l2_sw_if_index); + error = clib_error_return (0, "%U not cross-connected", + format_vnet_sw_if_index_name, + vnm, l2_sw_if_index); + goto done; + default: - return clib_error_return (0, "vnet_configure_l2_xcrw returned %d", rv); + error = clib_error_return (0, "vnet_configure_l2_xcrw returned %d", rv); + goto done; } +done: vec_free (rw); + unformat_free (line_input); - return 0; + return error; } /*? diff --git a/src/vnet/l2tp/l2tp.c b/src/vnet/l2tp/l2tp.c index a4531dab..2d323397 100644 --- a/src/vnet/l2tp/l2tp.c +++ b/src/vnet/l2tp/l2tp.c @@ -427,6 +427,7 @@ create_l2tpv3_tunnel_command_fn (vlib_main_t * vm, u32 sw_if_index; u32 encap_fib_id = ~0; u32 encap_fib_index = ~0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -455,18 +456,22 @@ create_l2tpv3_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "l2-sublayer-present")) l2_sublayer_present = 1; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (encap_fib_id != ~0) { uword *p; ip6_main_t *im = &ip6_main; if (!(p = hash_get (im->fib_index_by_table_id, encap_fib_id))) - return clib_error_return (0, "No fib with id %d", encap_fib_id); + { + error = clib_error_return (0, "No fib with id %d", encap_fib_id); + goto done; + } encap_fib_index = p[0]; } else @@ -475,9 +480,15 @@ create_l2tpv3_tunnel_command_fn (vlib_main_t * vm, } if (our_address_set == 0) - return clib_error_return (0, "our address not specified"); + { + error = clib_error_return (0, "our address not specified"); + goto done; + } if (client_address_set == 0) - return clib_error_return (0, "client address not specified"); + { + error = clib_error_return (0, "client address not specified"); + goto done; + } rv = create_l2tpv3_ipv6_tunnel (lm, &client_address, &our_address, local_session_id, remote_session_id, @@ -491,16 +502,22 @@ create_l2tpv3_tunnel_command_fn (vlib_main_t * vm, vnet_get_main (), sw_if_index); break; case VNET_API_ERROR_INVALID_VALUE: - return clib_error_return (0, "session already exists..."); + error = clib_error_return (0, "session already exists..."); + goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "session does not exist..."); + error = clib_error_return (0, "session does not exist..."); + goto done; default: - return clib_error_return (0, "l2tp_session_add_del returned %d", rv); + error = clib_error_return (0, "l2tp_session_add_del returned %d", rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/lisp-cp/lisp_cli.c b/src/vnet/lisp-cp/lisp_cli.c index 25d11c61..05df9fb6 100644 --- a/src/vnet/lisp-cp/lisp_cli.c +++ b/src/vnet/lisp-cp/lisp_cli.c @@ -25,6 +25,7 @@ lisp_show_adjacencies_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "%s %40s\n", "leid", "reid"); unformat_input_t _line_input, *line_input = &_line_input; u32 vni = ~0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -38,14 +39,14 @@ lisp_show_adjacencies_command_fn (vlib_main_t * vm, { vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, line_input); - return 0; + goto done; } } if (~0 == vni) { vlib_cli_output (vm, "error: no vni specified!"); - return 0; + goto done; } adjs = vnet_lisp_adjacencies_get_by_vni (vni); @@ -57,7 +58,10 @@ lisp_show_adjacencies_command_fn (vlib_main_t * vm, } vec_free (adjs); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -77,6 +81,7 @@ lisp_add_del_map_server_command_fn (vlib_main_t * vm, u8 is_add = 1, ip_set = 0; ip_address_t ip; unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -94,14 +99,14 @@ lisp_add_del_map_server_command_fn (vlib_main_t * vm, { vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, line_input); - return 0; + goto done; } } if (!ip_set) { vlib_cli_output (vm, "map-server ip address not set!"); - return 0; + goto done; } rv = vnet_lisp_add_del_map_server (&ip, is_add); @@ -109,7 +114,10 @@ lisp_add_del_map_server_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "failed to %s map-server!", is_add ? "add" : "delete"); - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -191,7 +199,7 @@ lisp_add_del_local_eid_command_fn (vlib_main_t * vm, unformat_input_t * input, if (key && (0 == key_id)) { vlib_cli_output (vm, "invalid key_id!"); - return 0; + goto done;; } gid_address_copy (&a->eid, &eid); @@ -213,6 +221,8 @@ done: vec_free (locator_set_name); gid_address_free (&a->eid); vec_free (a->key); + unformat_free (line_input); + return error; } @@ -233,6 +243,7 @@ lisp_eid_table_map_command_fn (vlib_main_t * vm, u8 is_add = 1, is_l2 = 0; u32 vni = 0, dp_id = 0; unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -250,11 +261,16 @@ lisp_eid_table_map_command_fn (vlib_main_t * vm, is_l2 = 1; else { - return unformat_parse_error (line_input); + error = unformat_parse_error (line_input); + goto done; } } vnet_lisp_eid_table_map (vni, dp_id, is_l2, is_add); - return 0; + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -479,7 +495,7 @@ lisp_add_del_adjacency_command_fn (vlib_main_t * vm, unformat_input_t * input, != ip_prefix_version (leid_ippref))) { clib_warning ("remote and local EIDs are of different types!"); - return error; + goto done; } memset (a, 0, sizeof (a[0])); @@ -512,6 +528,7 @@ lisp_map_request_mode_command_fn (vlib_main_t * vm, { unformat_input_t _i, *i = &_i; map_request_mode_t mr_mode = _MR_MODE_MAX; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, i)) @@ -533,12 +550,15 @@ lisp_map_request_mode_command_fn (vlib_main_t * vm, if (_MR_MODE_MAX == mr_mode) { clib_warning ("No LISP map request mode entered!"); - return 0; + goto done; } vnet_lisp_set_map_request_mode (mr_mode); + done: - return 0; + unformat_free (i); + + return error; } /* *INDENT-OFF* */ @@ -630,7 +650,10 @@ lisp_pitr_set_locator_set_command_fn (vlib_main_t * vm, else if (unformat (line_input, "disable")) is_add = 0; else - return clib_error_return (0, "parse error"); + { + error = clib_error_return (0, "parse error"); + goto done; + } } if (!locator_name_set) @@ -648,6 +671,8 @@ lisp_pitr_set_locator_set_command_fn (vlib_main_t * vm, done: if (locator_set_name) vec_free (locator_set_name); + unformat_free (line_input); + return error; } @@ -771,6 +796,7 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm, gid_address_t eid; u8 print_all = 1; u8 filter = 0; + clib_error_t *error = NULL; memset (&eid, 0, sizeof (eid)); @@ -787,8 +813,11 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm, else if (unformat (line_input, "remote")) filter = 2; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } vlib_cli_output (vm, "%-35s%-20s%-30s%-20s%-s", @@ -818,7 +847,7 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm, { mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &eid); if ((u32) ~ 0 == mi) - return 0; + goto done; mapit = pool_elt_at_index (lcm->mapping_pool, mi); locator_set_t *ls = pool_elt_at_index (lcm->locator_set_pool, @@ -827,14 +856,17 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm, if (filter && !((1 == filter && ls->local) || (2 == filter && !ls->local))) { - return 0; + goto done; } vlib_cli_output (vm, "%U,", format_eid_entry, lcm->vnet_main, lcm, mapit, ls); } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -853,6 +885,7 @@ lisp_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t * input, unformat_input_t _line_input, *line_input = &_line_input; u8 is_enabled = 0; u8 is_set = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -869,16 +902,24 @@ lisp_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t * input, is_set = 1; else { - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; } } if (!is_set) - return clib_error_return (0, "state not set"); + { + error = clib_error_return (0, "state not set"); + goto done; + } vnet_lisp_enable_disable (is_enabled); - return 0; + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -897,6 +938,7 @@ lisp_map_register_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u8 is_enabled = 0; u8 is_set = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -915,18 +957,22 @@ lisp_map_register_enable_disable_command_fn (vlib_main_t * vm, { vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, line_input); - return 0; + goto done; } } if (!is_set) { vlib_cli_output (vm, "state not set!"); - return 0; + goto done; } vnet_lisp_map_register_enable_disable (is_enabled); - return 0; + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -945,6 +991,7 @@ lisp_rloc_probe_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u8 is_enabled = 0; u8 is_set = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -963,18 +1010,22 @@ lisp_rloc_probe_enable_disable_command_fn (vlib_main_t * vm, { vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, line_input); - return 0; + goto done; } } if (!is_set) { vlib_cli_output (vm, "state not set!"); - return 0; + goto done; } vnet_lisp_rloc_probe_enable_disable (is_enabled); - return 0; + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -1022,6 +1073,7 @@ lisp_show_eid_table_map_command_fn (vlib_main_t * vm, lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); uword *vni_table = 0; u8 is_l2 = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -1040,14 +1092,17 @@ lisp_show_eid_table_map_command_fn (vlib_main_t * vm, is_l2 = 0; } else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } if (!vni_table) { vlib_cli_output (vm, "Error: expected l2|l3 param!\n"); - return 0; + goto done; } vlib_cli_output (vm, "%=10s%=10s", "VNI", is_l2 ? "BD" : "VRF"); @@ -1059,7 +1114,10 @@ lisp_show_eid_table_map_command_fn (vlib_main_t * vm, })); /* *INDENT-ON* */ - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ @@ -1131,6 +1189,8 @@ done: vec_free (locators); if (locator_set_name) vec_free (locator_set_name); + unformat_free (line_input); + return error; } @@ -1205,6 +1265,8 @@ lisp_add_del_locator_in_set_command_fn (vlib_main_t * vm, done: vec_free (locators); vec_free (locator_set_name); + unformat_free (line_input); + return error; } @@ -1322,6 +1384,8 @@ lisp_add_del_map_resolver_command_fn (vlib_main_t * vm, } done: + unformat_free (line_input); + return error; } @@ -1372,11 +1436,11 @@ lisp_add_del_mreq_itr_rlocs_command_fn (vlib_main_t * vm, is_add ? "add" : "delete"); } +done: vec_free (locator_set_name); + unformat_free (line_input); -done: return error; - } /* *INDENT-OFF* */ @@ -1438,7 +1502,10 @@ lisp_use_petr_set_locator_set_command_fn (vlib_main_t * vm, else if (unformat (line_input, "disable")) is_add = 0; else - return clib_error_return (0, "parse error"); + { + error = clib_error_return (0, "parse error"); + goto done; + } } if (!ip_set) @@ -1454,6 +1521,8 @@ lisp_use_petr_set_locator_set_command_fn (vlib_main_t * vm, } done: + unformat_free (line_input); + return error; } diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index 2142e095..19ac22e7 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -794,6 +794,7 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, u32 table_id, vni, bd_id; u8 vni_is_set = 0, vrf_is_set = 0, bd_index_is_set = 0; u8 nsh_iface = 0; + clib_error_t *error = NULL; if (vnet_lisp_gpe_enable_disable_status () == 0) { @@ -828,8 +829,9 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, } else { - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; } } @@ -839,7 +841,8 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, { if (~0 == lisp_gpe_add_nsh_iface (&lisp_gpe_main)) { - return clib_error_return (0, "NSH interface not created"); + error = clib_error_return (0, "NSH interface not created"); + goto done; } } else @@ -850,21 +853,34 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, } if (vrf_is_set && bd_index_is_set) - return clib_error_return (0, - "Cannot set both vrf and brdige domain index!"); + { + error = clib_error_return + (0, "Cannot set both vrf and brdige domain index!"); + goto done; + } if (!vni_is_set) - return clib_error_return (0, "vni must be set!"); + { + error = clib_error_return (0, "vni must be set!"); + goto done; + } if (!vrf_is_set && !bd_index_is_set) - return clib_error_return (0, "vrf or bridge domain index must be set!"); + { + error = + clib_error_return (0, "vrf or bridge domain index must be set!"); + goto done; + } if (bd_index_is_set) { if (is_add) { if (~0 == lisp_gpe_tenant_l2_iface_add_or_lock (vni, bd_id)) - return clib_error_return (0, "L2 interface not created"); + { + error = clib_error_return (0, "L2 interface not created"); + goto done; + } } else lisp_gpe_tenant_l2_iface_unlock (vni); @@ -874,13 +890,35 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, if (is_add) { if (~0 == lisp_gpe_tenant_l3_iface_add_or_lock (vni, table_id)) - return clib_error_return (0, "L3 interface not created"); + { + error = clib_error_return (0, "L3 interface not created"); + goto done; + } } else lisp_gpe_tenant_l3_iface_unlock (vni); } - return (NULL); + if (nsh_iface) + { + if (is_add) + { + if (~0 == lisp_gpe_add_nsh_iface (&lisp_gpe_main)) + { + error = clib_error_return (0, "NSH interface not created"); + goto done; + } + else + { + lisp_gpe_del_nsh_iface (&lisp_gpe_main); + } + } + } + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/lisp-gpe/lisp_gpe.c b/src/vnet/lisp-gpe/lisp_gpe.c index 1f8afdae..f2fbcbd5 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.c +++ b/src/vnet/lisp-gpe/lisp_gpe.c @@ -218,6 +218,7 @@ lisp_gpe_enable_disable_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u8 is_en = 1; vnet_lisp_gpe_enable_disable_args_t _a, *a = &_a; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -231,12 +232,18 @@ lisp_gpe_enable_disable_command_fn (vlib_main_t * vm, is_en = 0; else { - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; } } a->is_en = is_en; - return vnet_lisp_gpe_enable_disable (a); + error = vnet_lisp_gpe_enable_disable (a); + +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c index aeec6a94..a2d28118 100644 --- a/src/vnet/map/map.c +++ b/src/vnet/map/map.c @@ -465,6 +465,8 @@ map_security_check_command_fn (vlib_main_t * vm, { unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; + clib_error_t *error = NULL; + /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -476,11 +478,17 @@ map_security_check_command_fn (vlib_main_t * vm, else if (unformat (line_input, "on")) mm->sec_check = true; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + + return error; } static clib_error_t * @@ -490,6 +498,8 @@ map_security_check_frag_command_fn (vlib_main_t * vm, { unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; + clib_error_t *error = NULL; + /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -501,11 +511,17 @@ map_security_check_frag_command_fn (vlib_main_t * vm, else if (unformat (line_input, "on")) mm->sec_check_frag = true; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + + return error; } static clib_error_t * @@ -523,6 +539,7 @@ map_add_domain_command_fn (vlib_main_t * vm, u32 mtu = 0; u8 flags = 0; ip6_src_len = 128; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -559,20 +576,28 @@ map_add_domain_command_fn (vlib_main_t * vm, else if (unformat (line_input, "map-t")) flags |= MAP_DOMAIN_TRANSLATION; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args < 3) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } map_create_domain (&ip4_prefix, ip4_prefix_len, &ip6_prefix, ip6_prefix_len, &ip6_src, ip6_src_len, ea_bits_len, psid_offset, psid_length, &map_domain_index, mtu, flags); - return 0; +done: + unformat_free (line_input); + + return error; } static clib_error_t * @@ -582,6 +607,7 @@ map_del_domain_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; u32 num_m_args = 0; u32 map_domain_index; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -592,17 +618,25 @@ map_del_domain_command_fn (vlib_main_t * vm, if (unformat (line_input, "index %d", &map_domain_index)) num_m_args++; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args != 1) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } map_delete_domain (map_domain_index); - return 0; +done: + unformat_free (line_input); + + return error; } static clib_error_t * @@ -613,6 +647,7 @@ map_add_rule_command_fn (vlib_main_t * vm, ip6_address_t tep; u32 num_m_args = 0; u32 psid = 0, map_domain_index; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -628,19 +663,29 @@ map_add_rule_command_fn (vlib_main_t * vm, if (unformat (line_input, "ip6-dst %U", unformat_ip6_address, &tep)) num_m_args++; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args != 3) - return clib_error_return (0, "mandatory argument(s) missing"); + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } if (map_add_del_psid (map_domain_index, psid, &tep, 1) != 0) { - return clib_error_return (0, "Failing to add Mapping Rule"); + error = clib_error_return (0, "Failing to add Mapping Rule"); + goto done; } - return 0; + +done: + unformat_free (line_input); + + return error; } #if MAP_SKIP_IP6_LOOKUP @@ -653,6 +698,7 @@ map_pre_resolve_command_fn (vlib_main_t * vm, ip4_address_t ip4nh; ip6_address_t ip6nh; map_main_t *mm = &map_main; + clib_error_t *error = NULL; memset (&ip4nh, 0, sizeof (ip4nh)); memset (&ip6nh, 0, sizeof (ip6nh)); @@ -669,14 +715,19 @@ map_pre_resolve_command_fn (vlib_main_t * vm, if (unformat (line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh)) mm->preresolve_ip6 = ip6nh; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); map_pre_resolve (&ip4nh, &ip6nh); - return 0; +done: + unformat_free (line_input); + + return error; } #endif @@ -688,6 +739,7 @@ map_icmp_relay_source_address_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; ip4_address_t icmp_src_address; map_main_t *mm = &map_main; + clib_error_t *error = NULL; mm->icmp4_src_address.as_u32 = 0; @@ -701,12 +753,17 @@ map_icmp_relay_source_address_command_fn (vlib_main_t * vm, (line_input, "%U", unformat_ip4_address, &icmp_src_address)) mm->icmp4_src_address = icmp_src_address; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + return error; } static clib_error_t * @@ -717,6 +774,7 @@ map_icmp_unreachables_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; int num_m_args = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -730,16 +788,21 @@ map_icmp_unreachables_command_fn (vlib_main_t * vm, else if (unformat (line_input, "off")) mm->icmp6_enabled = false; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (num_m_args != 1) - return clib_error_return (0, "mandatory argument(s) missing"); + error = clib_error_return (0, "mandatory argument(s) missing"); - return 0; +done: + unformat_free (line_input); + + return error; } static clib_error_t * @@ -748,6 +811,7 @@ map_fragment_command_fn (vlib_main_t * vm, { unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -760,12 +824,17 @@ map_fragment_command_fn (vlib_main_t * vm, else if (unformat (line_input, "outer")) mm->frag_inner = false; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + return error; } static clib_error_t * @@ -775,6 +844,7 @@ map_fragment_df_command_fn (vlib_main_t * vm, { unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -787,12 +857,17 @@ map_fragment_df_command_fn (vlib_main_t * vm, else if (unformat (line_input, "off")) mm->frag_ignore_df = false; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + return error; } static clib_error_t * @@ -803,6 +878,7 @@ map_traffic_class_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; map_main_t *mm = &map_main; u32 tc = 0; + clib_error_t *error = NULL; mm->tc_copy = false; @@ -817,12 +893,17 @@ map_traffic_class_command_fn (vlib_main_t * vm, else if (unformat (line_input, "%x", &tc)) mm->tc = tc & 0xff; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + +done: unformat_free (line_input); - return 0; + return error; } static u8 * @@ -922,6 +1003,7 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, map_domain_t *d; bool counters = false; u32 map_domain_index = ~0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -934,10 +1016,12 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "index %d", &map_domain_index)) ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); if (pool_elts (mm->domains) == 0) vlib_cli_output (vm, "No MAP domains are configured..."); @@ -952,15 +1036,19 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, { if (pool_is_free_index (mm->domains, map_domain_index)) { - return clib_error_return (0, "MAP domain does not exists %d", - map_domain_index); + error = clib_error_return (0, "MAP domain does not exists %d", + map_domain_index); + goto done; } d = pool_elt_at_index (mm->domains, map_domain_index); vlib_cli_output (vm, "%U", format_map_domain, d, counters); } - return 0; +done: + unformat_free (line_input); + + return error; } static clib_error_t * diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 0e610e17..7ae4aa00 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -470,6 +470,8 @@ vnet_mpls_local_label (vlib_main_t * vm, } done: + unformat_free (line_input); + return error; } diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c index 8d1e30a3..e488271d 100644 --- a/src/vnet/mpls/mpls_tunnel.c +++ b/src/vnet/mpls/mpls_tunnel.c @@ -535,6 +535,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, fib_route_path_t rpath, *rpaths = NULL; mpls_label_t out_label = MPLS_LABEL_INVALID, *labels = NULL; u32 sw_if_index; + clib_error_t *error = NULL; memset(&rpath, 0, sizeof(rpath)); @@ -595,8 +596,11 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "l2-only")) l2_only = 1; else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } if (is_del) @@ -606,17 +610,22 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, else { if (0 == vec_len(labels)) - return clib_error_return (0, "No Output Labels '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "No Output Labels '%U'", + format_unformat_error, line_input); + goto done; + } vec_add1(rpaths, rpath); vnet_mpls_tunnel_add(rpaths, labels, l2_only, &sw_if_index); } +done: vec_free(labels); vec_free(rpaths); + unformat_free (line_input); - return (NULL); + return error; } /*? diff --git a/src/vnet/pg/cli.c b/src/vnet/pg/cli.c index f5896b43..3c249a7b 100644 --- a/src/vnet/pg/cli.c +++ b/src/vnet/pg/cli.c @@ -547,21 +547,30 @@ pg_capture_cmd_fn (vlib_main_t * vm, else { error = clib_error_create ("unknown input `%U'", - format_unformat_error, input); - return error; + format_unformat_error, line_input); + goto done; } } if (!hi) - return clib_error_return (0, "Please specify interface name"); + { + error = clib_error_return (0, "Please specify interface name"); + goto done; + } if (hi->dev_class_index != pg_dev_class.index) - return clib_error_return (0, "Please specify packet-generator interface"); + { + error = + clib_error_return (0, "Please specify packet-generator interface"); + goto done; + } if (!pcap_file_name && is_disable == 0) - return clib_error_return (0, "Please specify pcap file name"); + { + error = clib_error_return (0, "Please specify pcap file name"); + goto done; + } - unformat_free (line_input); pg_capture_args_t _a, *a = &_a; @@ -572,6 +581,10 @@ pg_capture_cmd_fn (vlib_main_t * vm, a->count = count; error = pg_capture (a); + +done: + unformat_free (line_input); + return error; } @@ -590,6 +603,7 @@ create_pg_if_cmd_fn (vlib_main_t * vm, pg_main_t *pg = &pg_main; unformat_input_t _line_input, *line_input = &_line_input; u32 if_id; + clib_error_t *error = NULL; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -600,14 +614,19 @@ create_pg_if_cmd_fn (vlib_main_t * vm, ; else - return clib_error_create ("unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_create ("unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + pg_interface_add_or_get (pg, if_id); + +done: unformat_free (line_input); - pg_interface_add_or_get (pg, if_id); - return 0; + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/policer/node_funcs.c b/src/vnet/policer/node_funcs.c index 1f4997ff..457dd09f 100644 --- a/src/vnet/policer/node_funcs.c +++ b/src/vnet/policer/node_funcs.c @@ -447,6 +447,7 @@ test_policer_command_fn (vlib_main_t * vm, int rx_set = 0; int is_add = 1; int is_show = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -468,7 +469,10 @@ test_policer_command_fn (vlib_main_t * vm, } if (rx_set == 0) - return clib_error_return (0, "interface not set"); + { + error = clib_error_return (0, "interface not set"); + goto done; + } if (is_show) { @@ -477,12 +481,13 @@ test_policer_command_fn (vlib_main_t * vm, policer = pool_elt_at_index (pm->policers, pi); vlib_cli_output (vm, "%U", format_policer_instance, policer); - return 0; + goto done; } if (is_add && config_name == 0) { - return clib_error_return (0, "policer config name required"); + error = clib_error_return (0, "policer config name required"); + goto done; } rv = test_policer_add_del (rx_sw_if_index, config_name, is_add); @@ -493,11 +498,15 @@ test_policer_command_fn (vlib_main_t * vm, break; default: - return clib_error_return + error = clib_error_return (0, "WARNING: vnet_vnet_policer_add_del returned %d", rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/policer/policer.c b/src/vnet/policer/policer.c index 290a6af5..cd754e29 100644 --- a/src/vnet/policer/policer.c +++ b/src/vnet/policer/policer.c @@ -413,6 +413,7 @@ configure_policer_command_fn (vlib_main_t * vm, u8 is_add = 1; u8 *name = 0; u32 pi; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -433,13 +434,19 @@ configure_policer_command_fn (vlib_main_t * vm, foreach_config_param #undef _ else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } + error = policer_add_del (vm, name, &c, &pi, is_add); + +done: unformat_free (line_input); - return policer_add_del (vm, name, &c, &pi, is_add); + return error; } /* *INDENT-OFF* */ diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 48e81b50..25c930c6 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -1308,6 +1308,7 @@ tap_connect_command_fn (vlib_main_t * vm, int ip6_address_set = 0; u32 ip4_mask_width = 0; u32 ip6_mask_width = 0; + clib_error_t *error = NULL; if (tm->is_disabled) return clib_error_return (0, "device disabled..."); @@ -1336,12 +1337,18 @@ tap_connect_command_fn (vlib_main_t * vm, else if (unformat (line_input, "%s", &intfc_name)) ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } } if (intfc_name == 0) - return clib_error_return (0, "interface name must be specified"); + { + error = clib_error_return (0, "interface name must be specified"); + goto done; + } memset (ap, 0, sizeof (*ap)); @@ -1367,48 +1374,64 @@ tap_connect_command_fn (vlib_main_t * vm, switch (rv) { case VNET_API_ERROR_SYSCALL_ERROR_1: - return clib_error_return (0, "Couldn't open /dev/net/tun"); + error = clib_error_return (0, "Couldn't open /dev/net/tun"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_2: - return clib_error_return (0, "Error setting flags on '%s'", intfc_name); - + error = clib_error_return (0, "Error setting flags on '%s'", intfc_name); + goto done; + case VNET_API_ERROR_SYSCALL_ERROR_3: - return clib_error_return (0, "Couldn't open provisioning socket"); + error = clib_error_return (0, "Couldn't open provisioning socket"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_4: - return clib_error_return (0, "Couldn't get if_index"); + error = clib_error_return (0, "Couldn't get if_index"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_5: - return clib_error_return (0, "Couldn't bind provisioning socket"); + error = clib_error_return (0, "Couldn't bind provisioning socket"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_6: - return clib_error_return (0, "Couldn't set device non-blocking flag"); + error = clib_error_return (0, "Couldn't set device non-blocking flag"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_7: - return clib_error_return (0, "Couldn't set device MTU"); + error = clib_error_return (0, "Couldn't set device MTU"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_8: - return clib_error_return (0, "Couldn't get interface flags"); + error = clib_error_return (0, "Couldn't get interface flags"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_9: - return clib_error_return (0, "Couldn't set intfc admin state up"); + error = clib_error_return (0, "Couldn't set intfc admin state up"); + goto done; case VNET_API_ERROR_SYSCALL_ERROR_10: - return clib_error_return (0, "Couldn't set intfc address/mask"); + error = clib_error_return (0, "Couldn't set intfc address/mask"); + goto done; case VNET_API_ERROR_INVALID_REGISTRATION: - return clib_error_return (0, "Invalid registration"); + error = clib_error_return (0, "Invalid registration"); + goto done; case 0: break; default: - return clib_error_return (0, "Unknown error: %d", rv); + error = clib_error_return (0, "Unknown error: %d", rv); + goto done; } vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); - return 0; + +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (tap_connect_command, static) = { diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c index b97510c4..2cba596f 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.c +++ b/src/vnet/vxlan-gpe/vxlan_gpe.c @@ -454,6 +454,7 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, u32 tmp; vnet_vxlan_gpe_add_del_tunnel_args_t _a, * a = &_a; u32 sw_if_index; + clib_error_t *error = NULL; /* Get a line of input. */ if (! unformat_user (input, unformat_line_input, line_input)) @@ -494,7 +495,10 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, encap_fib_index = ip4_fib_index_from_table_id (tmp); if (encap_fib_index == ~0) - return clib_error_return (0, "nonexistent encap fib id %d", tmp); + { + error = clib_error_return (0, "nonexistent encap fib id %d", tmp); + goto done; + } } else if (unformat (line_input, "decap-vrf-id %d", &tmp)) { @@ -504,7 +508,10 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, decap_fib_index = ip4_fib_index_from_table_id (tmp); if (decap_fib_index == ~0) - return clib_error_return (0, "nonexistent decap fib id %d", tmp); + { + error = clib_error_return (0, "nonexistent decap fib id %d", tmp); + goto done; + } } else if (unformat (line_input, "vni %d", &vni)) vni_set = 1; @@ -517,27 +524,43 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, else if (unformat(line_input, "next-nsh")) protocol = VXLAN_GPE_PROTOCOL_NSH; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (local_set == 0) - return clib_error_return (0, "tunnel local address not specified"); + { + error = clib_error_return (0, "tunnel local address not specified"); + goto done; + } if (remote_set == 0) - return clib_error_return (0, "tunnel remote address not specified"); + { + error = clib_error_return (0, "tunnel remote address not specified"); + goto done; + } if (ipv4_set && ipv6_set) - return clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + { + error = clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + goto done; + } if ((ipv4_set && memcmp(&local.ip4, &remote.ip4, sizeof(local.ip4)) == 0) || (ipv6_set && memcmp(&local.ip6, &remote.ip6, sizeof(local.ip6)) == 0)) - return clib_error_return (0, "src and dst addresses are identical"); + { + error = clib_error_return (0, "src and dst addresses are identical"); + goto done; + } if (vni_set == 0) - return clib_error_return (0, "vni not specified"); + { + error = clib_error_return (0, "vni not specified"); + goto done; + } memset (a, 0, sizeof (*a)); @@ -558,20 +581,27 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); break; case VNET_API_ERROR_INVALID_DECAP_NEXT: - return clib_error_return (0, "invalid decap-next..."); + error = clib_error_return (0, "invalid decap-next..."); + goto done; case VNET_API_ERROR_TUNNEL_EXIST: - return clib_error_return (0, "tunnel already exists..."); + error = clib_error_return (0, "tunnel already exists..."); + goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "tunnel does not exist..."); + error = clib_error_return (0, "tunnel does not exist..."); + goto done; default: - return clib_error_return + error = clib_error_return (0, "vnet_vxlan_gpe_add_del_tunnel returned %d", rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = { diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c index 849fc25d..eedc16f8 100644 --- a/src/vnet/vxlan/vxlan.c +++ b/src/vnet/vxlan/vxlan.c @@ -657,6 +657,7 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, int rv; vnet_vxlan_add_del_tunnel_args_t _a, * a = &_a; u32 tunnel_sw_if_index; + clib_error_t *error = NULL; /* Cant "universally zero init" (={0}) due to GCC bug 53119 */ memset(&src, 0, sizeof src); @@ -715,7 +716,10 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, { encap_fib_index = fib_table_find (fib_ip_proto (ipv6_set), tmp); if (encap_fib_index == ~0) - return clib_error_return (0, "nonexistent encap-vrf-id %d", tmp); + { + error = clib_error_return (0, "nonexistent encap-vrf-id %d", tmp); + goto done; + } } else if (unformat (line_input, "decap-next %U", unformat_decap_next, &decap_next_index, ipv4_set)) @@ -723,41 +727,72 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "vni %d", &vni)) { if (vni >> 24) - return clib_error_return (0, "vni %d out of range", vni); + { + error = clib_error_return (0, "vni %d out of range", vni); + goto done; + } } else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } } - unformat_free (line_input); - if (src_set == 0) - return clib_error_return (0, "tunnel src address not specified"); + { + error = clib_error_return (0, "tunnel src address not specified"); + goto done; + } if (dst_set == 0) - return clib_error_return (0, "tunnel dst address not specified"); + { + error = clib_error_return (0, "tunnel dst address not specified"); + goto done; + } if (grp_set && !ip46_address_is_multicast(&dst)) - return clib_error_return (0, "tunnel group address not multicast"); + { + error = clib_error_return (0, "tunnel group address not multicast"); + goto done; + } if (grp_set == 0 && ip46_address_is_multicast(&dst)) - return clib_error_return (0, "dst address must be unicast"); + { + error = clib_error_return (0, "dst address must be unicast"); + goto done; + } if (grp_set && mcast_sw_if_index == ~0) - return clib_error_return (0, "tunnel nonexistent multicast device"); + { + error = clib_error_return (0, "tunnel nonexistent multicast device"); + goto done; + } if (ipv4_set && ipv6_set) - return clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + { + error = clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + goto done; + } if (ip46_address_cmp(&src, &dst) == 0) - return clib_error_return (0, "src and dst addresses are identical"); + { + error = clib_error_return (0, "src and dst addresses are identical"); + goto done; + } if (decap_next_index == ~0) - return clib_error_return (0, "next node not found"); + { + error = clib_error_return (0, "next node not found"); + goto done; + } if (vni == 0) - return clib_error_return (0, "vni not specified"); + { + error = clib_error_return (0, "vni not specified"); + goto done; + } memset (a, 0, sizeof (*a)); @@ -779,17 +814,23 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm, break; case VNET_API_ERROR_TUNNEL_EXIST: - return clib_error_return (0, "tunnel already exists..."); + error = clib_error_return (0, "tunnel already exists..."); + goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "tunnel does not exist..."); + error = clib_error_return (0, "tunnel does not exist..."); + goto done; default: - return clib_error_return + error = clib_error_return (0, "vnet_vxlan_add_del_tunnel returned %d", rv); + goto done; } - return 0; +done: + unformat_free (line_input); + + return error; } /*? @@ -912,6 +953,8 @@ set_ip_vxlan_bypass (u32 is_ip6, vnet_int_vxlan_bypass_mode (sw_if_index, is_ip6, is_enable); done: + unformat_free (line_input); + return error; } diff --git a/src/vpp/app/l2t.c b/src/vpp/app/l2t.c index 45dd2807..e1eda155 100644 --- a/src/vpp/app/l2t.c +++ b/src/vpp/app/l2t.c @@ -254,6 +254,7 @@ l2tp_session_add_command_fn (vlib_main_t * vm, u32 local_session_id = 1, remote_session_id = 1; int our_address_set = 0, client_address_set = 0; int l2_sublayer_present = 0; + clib_error_t *error = NULL; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -290,8 +291,12 @@ l2tp_session_add_command_fn (vlib_main_t * vm, else if (unformat (line_input, "l2-sublayer-present")) l2_sublayer_present = 1; else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } } unformat_free (line_input); diff --git a/src/vpp/app/vpe_cli.c b/src/vpp/app/vpe_cli.c index a26bf71f..94bdc84c 100644 --- a/src/vpp/app/vpe_cli.c +++ b/src/vpp/app/vpe_cli.c @@ -36,6 +36,7 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, mac_addr_t *mac_addrs = 0; u32 sw_if_index; u32 i; + clib_error_t *error = NULL; next_hops = NULL; rpaths = NULL; @@ -49,7 +50,11 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, if (!unformat (line_input, "%U %U", unformat_ip4_address, &prefix.fp_addr.ip4, unformat_vnet_sw_interface, vnm, &sw_if_index)) - goto barf; + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { @@ -67,13 +72,18 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, } else { - barf: - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; } } + if (vec_len (mac_addrs) == 0 || vec_len (mac_addrs) != vec_len (next_hops)) - goto barf; + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } /* Create / delete special interface route /32's */ @@ -100,10 +110,12 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, &prefix, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, rpaths); +done: vec_free (mac_addrs); vec_free (next_hops); + unformat_free (line_input); - return 0; + return error; } /* *INDENT-OFF* */ -- cgit 1.2.3-korg From c3a814be9dc769be942ff8029c7b6eccd4b3af05 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 28 Feb 2017 19:22:22 +0100 Subject: dpdk: be a plugin Change-Id: I238258cdeb77035adc5e88903d824593d0a1da90 Signed-off-by: Damjan Marion --- src/Makefile.am | 21 - src/plugins/Makefile.am | 5 + src/plugins/dpdk.am | 50 + src/plugins/dpdk/api/dpdk.api | 103 + src/plugins/dpdk/api/dpdk_all_api_h.h | 19 + src/plugins/dpdk/api/dpdk_msg_enum.h | 31 + src/plugins/dpdk/api/dpdk_test.c | 397 ++++ src/plugins/dpdk/buffer.c | 588 ++++++ src/plugins/dpdk/device/cli.c | 2079 ++++++++++++++++++++ src/plugins/dpdk/device/device.c | 852 ++++++++ src/plugins/dpdk/device/dpdk.h | 490 +++++ src/plugins/dpdk/device/dpdk_priv.h | 135 ++ src/plugins/dpdk/device/format.c | 754 +++++++ src/plugins/dpdk/device/node.c | 674 +++++++ src/plugins/dpdk/dir.dox | 27 + src/plugins/dpdk/hqos/hqos.c | 775 ++++++++ src/plugins/dpdk/hqos/qos_doc.md | 411 ++++ src/plugins/dpdk/init.c | 2074 +++++++++++++++++++ src/plugins/dpdk/ipsec/cli.c | 154 ++ src/plugins/dpdk/ipsec/crypto_node.c | 215 ++ src/plugins/dpdk/ipsec/dir.dox | 18 + src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md | 86 + src/plugins/dpdk/ipsec/esp.h | 249 +++ src/plugins/dpdk/ipsec/esp_decrypt.c | 594 ++++++ src/plugins/dpdk/ipsec/esp_encrypt.c | 609 ++++++ src/plugins/dpdk/ipsec/ipsec.c | 430 ++++ src/plugins/dpdk/ipsec/ipsec.h | 227 +++ src/plugins/dpdk/main.c | 95 + src/plugins/dpdk/thread.c | 85 + src/vat/api_format.c | 320 --- src/vnet.am | 41 +- src/vnet/devices/dpdk/buffer.c | 588 ------ src/vnet/devices/dpdk/cli.c | 2079 -------------------- src/vnet/devices/dpdk/device.c | 852 -------- src/vnet/devices/dpdk/dir.dox | 27 - src/vnet/devices/dpdk/dpdk.api | 103 - src/vnet/devices/dpdk/dpdk.h | 487 ----- src/vnet/devices/dpdk/dpdk_api.c | 246 --- src/vnet/devices/dpdk/dpdk_priv.h | 135 -- src/vnet/devices/dpdk/format.c | 754 ------- src/vnet/devices/dpdk/hqos.c | 775 -------- src/vnet/devices/dpdk/init.c | 1801 ----------------- src/vnet/devices/dpdk/ipsec/cli.c | 154 -- src/vnet/devices/dpdk/ipsec/crypto_node.c | 215 -- src/vnet/devices/dpdk/ipsec/dir.dox | 18 - .../devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md | 86 - src/vnet/devices/dpdk/ipsec/esp.h | 249 --- src/vnet/devices/dpdk/ipsec/esp_decrypt.c | 594 ------ src/vnet/devices/dpdk/ipsec/esp_encrypt.c | 609 ------ src/vnet/devices/dpdk/ipsec/ipsec.c | 430 ---- src/vnet/devices/dpdk/ipsec/ipsec.h | 227 --- src/vnet/devices/dpdk/main.c | 85 - src/vnet/devices/dpdk/node.c | 674 ------- src/vnet/devices/dpdk/qos_doc.md | 411 ---- src/vnet/devices/dpdk/thread.c | 85 - src/vnet/devices/virtio/vhost-user.h | 11 - src/vnet/ipsec/ipsec_api.c | 4 - src/vnet/pg/input.c | 11 +- src/vnet/pg/stream.c | 5 +- src/vnet/replication.c | 6 +- src/vnet/vnet_all_api_h.h | 3 - src/vpp/api/custom_dump.c | 64 - src/vpp/api/gmon.c | 3 +- src/vpp/api/vpe.api | 1 - src/vpp/app/l2t.c | 562 ------ src/vpp/app/l2t_l2.c | 267 --- 66 files changed, 12241 insertions(+), 12988 deletions(-) create mode 100644 src/plugins/dpdk.am create mode 100644 src/plugins/dpdk/api/dpdk.api create mode 100644 src/plugins/dpdk/api/dpdk_all_api_h.h create mode 100644 src/plugins/dpdk/api/dpdk_msg_enum.h create mode 100644 src/plugins/dpdk/api/dpdk_test.c create mode 100644 src/plugins/dpdk/buffer.c create mode 100644 src/plugins/dpdk/device/cli.c create mode 100644 src/plugins/dpdk/device/device.c create mode 100644 src/plugins/dpdk/device/dpdk.h create mode 100644 src/plugins/dpdk/device/dpdk_priv.h create mode 100644 src/plugins/dpdk/device/format.c create mode 100644 src/plugins/dpdk/device/node.c create mode 100644 src/plugins/dpdk/dir.dox create mode 100644 src/plugins/dpdk/hqos/hqos.c create mode 100644 src/plugins/dpdk/hqos/qos_doc.md create mode 100755 src/plugins/dpdk/init.c create mode 100644 src/plugins/dpdk/ipsec/cli.c create mode 100644 src/plugins/dpdk/ipsec/crypto_node.c create mode 100644 src/plugins/dpdk/ipsec/dir.dox create mode 100644 src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md create mode 100644 src/plugins/dpdk/ipsec/esp.h create mode 100644 src/plugins/dpdk/ipsec/esp_decrypt.c create mode 100644 src/plugins/dpdk/ipsec/esp_encrypt.c create mode 100644 src/plugins/dpdk/ipsec/ipsec.c create mode 100644 src/plugins/dpdk/ipsec/ipsec.h create mode 100644 src/plugins/dpdk/main.c create mode 100644 src/plugins/dpdk/thread.c delete mode 100644 src/vnet/devices/dpdk/buffer.c delete mode 100644 src/vnet/devices/dpdk/cli.c delete mode 100644 src/vnet/devices/dpdk/device.c delete mode 100644 src/vnet/devices/dpdk/dir.dox delete mode 100644 src/vnet/devices/dpdk/dpdk.api delete mode 100644 src/vnet/devices/dpdk/dpdk.h delete mode 100644 src/vnet/devices/dpdk/dpdk_api.c delete mode 100644 src/vnet/devices/dpdk/dpdk_priv.h delete mode 100644 src/vnet/devices/dpdk/format.c delete mode 100644 src/vnet/devices/dpdk/hqos.c delete mode 100755 src/vnet/devices/dpdk/init.c delete mode 100644 src/vnet/devices/dpdk/ipsec/cli.c delete mode 100644 src/vnet/devices/dpdk/ipsec/crypto_node.c delete mode 100644 src/vnet/devices/dpdk/ipsec/dir.dox delete mode 100644 src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md delete mode 100644 src/vnet/devices/dpdk/ipsec/esp.h delete mode 100644 src/vnet/devices/dpdk/ipsec/esp_decrypt.c delete mode 100644 src/vnet/devices/dpdk/ipsec/esp_encrypt.c delete mode 100644 src/vnet/devices/dpdk/ipsec/ipsec.c delete mode 100644 src/vnet/devices/dpdk/ipsec/ipsec.h delete mode 100644 src/vnet/devices/dpdk/main.c delete mode 100644 src/vnet/devices/dpdk/node.c delete mode 100644 src/vnet/devices/dpdk/qos_doc.md delete mode 100644 src/vnet/devices/dpdk/thread.c delete mode 100644 src/vpp/app/l2t.c delete mode 100644 src/vpp/app/l2t_l2.c (limited to 'src/vpp/app') diff --git a/src/Makefile.am b/src/Makefile.am index 641707ed..5daaa48e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -38,27 +38,6 @@ install-data-local: GREP_TIME=`echo $$GREP_TIME | awk '{print $$2}'` ; \ echo "Command list built, Time taken: $$GREP_TIME" -############################################################################### -# DPDK -############################################################################### - -if WITH_DPDK -if ENABLE_DPDK_SHARED -DPDK_LD_FLAGS = -Wl,--whole-archive,-ldpdk,--no-whole-archive -else -DPDK_LD_FLAGS = -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl -endif -if WITH_DPDK_CRYPTO_SW -DPDK_LD_ADD = -lIPSec_MB -lisal_crypto -endif -if WITH_DPDK_MLX5_PMD -DPDK_LD_FLAGS += -libverbs -lmlx5 -lnuma -endif -else -DPDK_LD_FLAGS = -DPDK_LD_ADD = -endif - ############################################################################### # Components ############################################################################### diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 06b575d1..c8877899 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -24,6 +24,7 @@ vppplugins_LTLIBRARIES = vppapitestplugins_LTLIBRARIES = noinst_HEADERS = nobase_apiinclude_HEADERS = +nobase_include_HEADERS = vppapitestpluginsdir = ${libdir}/vpp_api_test_plugins vpppluginsdir = ${libdir}/vpp_plugins @@ -32,6 +33,10 @@ if ENABLE_ACL_PLUGIN include acl.am endif +if WITH_DPDK +include dpdk.am +endif + if ENABLE_FLOWPERPKT_PLUGIN include flowperpkt.am endif diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am new file mode 100644 index 00000000..212bbb73 --- /dev/null +++ b/src/plugins/dpdk.am @@ -0,0 +1,50 @@ +# Copyright (c) 2016 Cisco Systems, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppapitestplugins_LTLIBRARIES += dpdk_test_plugin.la +vppplugins_LTLIBRARIES += dpdk_plugin.la + +dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl + +dpdk_plugin_la_SOURCES = \ + dpdk/init.c \ + dpdk/main.c \ + dpdk/buffer.c \ + dpdk/thread.c \ + dpdk/device/cli.c \ + dpdk/device/dpdk_priv.h \ + dpdk/device/device.c \ + dpdk/device/format.c \ + dpdk/device/node.c \ + dpdk/hqos/hqos.c \ + dpdk/ipsec/esp_encrypt.c \ + dpdk/ipsec/esp_decrypt.c \ + dpdk/ipsec/crypto_node.c \ + dpdk/ipsec/cli.c \ + dpdk/ipsec/ipsec.c \ + dpdk/api/dpdk_plugin.api.h + +API_FILES += dpdk/api/dpdk.api + +nobase_include_HEADERS += \ + dpdk/device/dpdk.h \ + dpdk/api/dpdk_all_api_h.h + +nobase_include_HEADERS += \ + dpdk/ipsec/ipsec.h \ + dpdk/ipsec/esp.h + +dpdk_test_plugin_la_SOURCES = \ + dpdk/api/dpdk_test.c dpdk/api/dpdk_plugin.api.h + +# vi:syntax=automake diff --git a/src/plugins/dpdk/api/dpdk.api b/src/plugins/dpdk/api/dpdk.api new file mode 100644 index 00000000..21215d45 --- /dev/null +++ b/src/plugins/dpdk/api/dpdk.api @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief DPDK interface HQoS pipe profile set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param pipe - pipe ID within its subport + @param profile - pipe profile ID +*/ +define sw_interface_set_dpdk_hqos_pipe { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 pipe; + u32 profile; +}; + +/** \brief DPDK interface HQoS pipe profile set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_pipe_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS subport parameters set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param tb_rate - subport token bucket rate (measured in bytes/second) + @param tb_size - subport token bucket size (measured in credits) + @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) + @param tc_period - enforcement period for rates (measured in milliseconds) +*/ +define sw_interface_set_dpdk_hqos_subport { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 tb_rate; + u32 tb_size; + u32 tc_rate[4]; + u32 tc_period; +}; + +/** \brief DPDK interface HQoS subport parameters set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_subport_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS tctbl entry set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param entry - entry index ID + @param tc - traffic class (0 .. 3) + @param queue - traffic class queue (0 .. 3) +*/ +define sw_interface_set_dpdk_hqos_tctbl { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 entry; + u32 tc; + u32 queue; +}; + +/** \brief DPDK interface HQoS tctbl entry set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_tctbl_reply { + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + \ No newline at end of file diff --git a/src/plugins/dpdk/api/dpdk_all_api_h.h b/src/plugins/dpdk/api/dpdk_all_api_h.h new file mode 100644 index 00000000..15eb98d6 --- /dev/null +++ b/src/plugins/dpdk/api/dpdk_all_api_h.h @@ -0,0 +1,19 @@ + +/* + * dpdk_all_api_h.h - skeleton vpp engine plug-in api #include file + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include diff --git a/src/plugins/dpdk/api/dpdk_msg_enum.h b/src/plugins/dpdk/api/dpdk_msg_enum.h new file mode 100644 index 00000000..952ce6ad --- /dev/null +++ b/src/plugins/dpdk/api/dpdk_msg_enum.h @@ -0,0 +1,31 @@ + +/* + * dpdk_msg_enum.h - skeleton vpp engine plug-in message enumeration + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_dpdk_msg_enum_h +#define included_dpdk_msg_enum_h + +#include + +#define vl_msg_id(n,h) n, +typedef enum { +#include + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_dpdk_msg_enum_h */ diff --git a/src/plugins/dpdk/api/dpdk_test.c b/src/plugins/dpdk/api/dpdk_test.c new file mode 100644 index 00000000..9fe0f934 --- /dev/null +++ b/src/plugins/dpdk/api/dpdk_test.c @@ -0,0 +1,397 @@ + +/* + * dpdk_test.c - skeleton vpp-api-test plug-in + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include + +uword unformat_sw_if_index (unformat_input_t * input, va_list * args); + +/* Declare message IDs */ +#include + +/* define message structures */ +#define vl_typedefs +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} dpdk_test_main_t; + +dpdk_test_main_t dpdk_test_main; + +#define foreach_standard_reply_retval_handler \ +_(sw_interface_set_dpdk_hqos_pipe_reply) \ +_(sw_interface_set_dpdk_hqos_subport_reply) \ +_(sw_interface_set_dpdk_hqos_tctbl_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = dpdk_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \ + sw_interface_set_dpdk_hqos_pipe_reply) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \ + sw_interface_set_dpdk_hqos_subport_reply) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ + sw_interface_set_dpdk_hqos_tctbl_reply) + +/* M: construct, but don't yet send a message */ +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + dm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + dm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + +static int +api_sw_interface_set_dpdk_hqos_pipe (vat_main_t * vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + unformat_input_t *i = vam->input; + vl_api_sw_interface_set_dpdk_hqos_pipe_t *mp; + f64 timeout; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u32 subport; + u8 subport_set = 0; + u32 pipe; + u8 pipe_set = 0; + u32 profile; + u8 profile_set = 0; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "rx sw_if_index %u", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "subport %u", &subport)) + subport_set = 1; + else if (unformat (i, "pipe %u", &pipe)) + pipe_set = 1; + else if (unformat (i, "profile %u", &profile)) + profile_set = 1; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + if (subport_set == 0) + { + errmsg ("missing subport "); + return -99; + } + + if (pipe_set == 0) + { + errmsg ("missing pipe"); + return -99; + } + + if (profile_set == 0) + { + errmsg ("missing profile"); + return -99; + } + + M (SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe); + + mp->sw_if_index = ntohl (sw_if_index); + mp->subport = ntohl (subport); + mp->pipe = ntohl (pipe); + mp->profile = ntohl (profile); + + + S; + W; + /* NOTREACHED */ + return 0; +} + +static int +api_sw_interface_set_dpdk_hqos_subport (vat_main_t * vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + unformat_input_t *i = vam->input; + vl_api_sw_interface_set_dpdk_hqos_subport_t *mp; + f64 timeout; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u32 subport; + u8 subport_set = 0; + u32 tb_rate = 1250000000; /* 10GbE */ + u32 tb_size = 1000000; + u32 tc_rate[] = { 1250000000, 1250000000, 1250000000, 1250000000 }; + u32 tc_period = 10; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "rx sw_if_index %u", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "subport %u", &subport)) + subport_set = 1; + else if (unformat (i, "rate %u", &tb_rate)) + { + u32 tc_id; + + for (tc_id = 0; tc_id < (sizeof (tc_rate) / sizeof (tc_rate[0])); + tc_id++) + tc_rate[tc_id] = tb_rate; + } + else if (unformat (i, "bktsize %u", &tb_size)) + ; + else if (unformat (i, "tc0 %u", &tc_rate[0])) + ; + else if (unformat (i, "tc1 %u", &tc_rate[1])) + ; + else if (unformat (i, "tc2 %u", &tc_rate[2])) + ; + else if (unformat (i, "tc3 %u", &tc_rate[3])) + ; + else if (unformat (i, "period %u", &tc_period)) + ; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + if (subport_set == 0) + { + errmsg ("missing subport "); + return -99; + } + + M (SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport); + + mp->sw_if_index = ntohl (sw_if_index); + mp->subport = ntohl (subport); + mp->tb_rate = ntohl (tb_rate); + mp->tb_size = ntohl (tb_size); + mp->tc_rate[0] = ntohl (tc_rate[0]); + mp->tc_rate[1] = ntohl (tc_rate[1]); + mp->tc_rate[2] = ntohl (tc_rate[2]); + mp->tc_rate[3] = ntohl (tc_rate[3]); + mp->tc_period = ntohl (tc_period); + + S; + W; + /* NOTREACHED */ + return 0; +} + +static int +api_sw_interface_set_dpdk_hqos_tctbl (vat_main_t * vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + unformat_input_t *i = vam->input; + vl_api_sw_interface_set_dpdk_hqos_tctbl_t *mp; + f64 timeout; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u8 entry_set = 0; + u8 tc_set = 0; + u8 queue_set = 0; + u32 entry, tc, queue; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "rx sw_if_index %u", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "entry %d", &entry)) + entry_set = 1; + else if (unformat (i, "tc %d", &tc)) + tc_set = 1; + else if (unformat (i, "queue %d", &queue)) + queue_set = 1; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + if (entry_set == 0) + { + errmsg ("missing entry "); + return -99; + } + + if (tc_set == 0) + { + errmsg ("missing traffic class "); + return -99; + } + + if (queue_set == 0) + { + errmsg ("missing queue "); + return -99; + } + + M (SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl); + + mp->sw_if_index = ntohl (sw_if_index); + mp->entry = ntohl (entry); + mp->tc = ntohl (tc); + mp->queue = ntohl (queue); + + S; + W; + /* NOTREACHED */ + return 0; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(sw_interface_set_dpdk_hqos_pipe, \ + "rx sw_if_index subport pipe \n" \ + "profile \n") \ +_(sw_interface_set_dpdk_hqos_subport, \ + "rx sw_if_index subport [rate ]\n" \ + "[bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ]\n") \ +_(sw_interface_set_dpdk_hqos_tctbl, \ + "rx sw_if_index entry tc queue \n") + +void vat_api_hookup (vat_main_t *vam) +{ + dpdk_test_main_t * dm __attribute__((unused)) = &dpdk_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * vat_plugin_register (vat_main_t *vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + u8 * name; + + dm->vat_main = vam; + + /* Ask the vpp engine for the first assigned message-id */ + name = format (0, "dpdk_%08x%c", api_version, 0); + dm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (dm->msg_id_base != (u16) ~0) + vat_api_hookup (vam); + + vec_free(name); + + return 0; +} diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c new file mode 100644 index 00000000..2765c292 --- /dev/null +++ b/src/plugins/dpdk/buffer.c @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer.c: allocate/free network buffers. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @file + * + * Allocate/free network buffers. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, + "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); + +static_always_inline void +dpdk_rte_pktmbuf_free (vlib_main_t * vm, vlib_buffer_t * b) +{ + vlib_buffer_t *hb = b; + struct rte_mbuf *mb; + u32 next, flags; + mb = rte_mbuf_from_vlib_buffer (hb); + +next: + flags = b->flags; + next = b->next_buffer; + mb = rte_mbuf_from_vlib_buffer (b); + + if (PREDICT_FALSE (b->n_add_refs)) + { + rte_mbuf_refcnt_update (mb, b->n_add_refs); + b->n_add_refs = 0; + } + + rte_pktmbuf_free_seg (mb); + + if (flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, next); + goto next; + } +} + +static void +del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) +{ + u32 i; + vlib_buffer_t *b; + + for (i = 0; i < vec_len (f->buffers); i++) + { + b = vlib_get_buffer (vm, f->buffers[i]); + dpdk_rte_pktmbuf_free (vm, b); + } + + vec_free (f->name); + vec_free (f->buffers); +} + +/* Add buffer free list. */ +static void +dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *f; + u32 merge_index; + int i; + + ASSERT (os_get_cpu_number () == 0); + + f = vlib_buffer_get_free_list (vm, free_list_index); + + merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); + if (merge_index != ~0 && merge_index != free_list_index) + { + vlib_buffer_merge_free_lists (pool_elt_at_index + (bm->buffer_free_list_pool, merge_index), + f); + } + + del_free_list (vm, f); + + /* Poison it. */ + memset (f, 0xab, sizeof (f[0])); + + pool_put (bm->buffer_free_list_pool, f); + + for (i = 1; i < vec_len (vlib_mains); i++) + { + bm = vlib_mains[i]->buffer_main; + f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);; + memset (f, 0xab, sizeof (f[0])); + pool_put (bm->buffer_free_list_pool, f); + } +} + +/* Make sure free list has at least given number of free buffers. */ +static uword +fill_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, uword min_free_buffers) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_buffer_t *b0, *b1, *b2, *b3; + int n, i; + u32 bi0, bi1, bi2, bi3; + unsigned socket_id = rte_socket_id (); + struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id]; + struct rte_mbuf *mb0, *mb1, *mb2, *mb3; + + /* Too early? */ + if (PREDICT_FALSE (rmp == 0)) + return 0; + + /* Already have enough free buffers on free list? */ + n = min_free_buffers - vec_len (fl->buffers); + if (n <= 0) + return min_free_buffers; + + /* Always allocate round number of buffers. */ + n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32)); + + /* Always allocate new buffers in reasonably large sized chunks. */ + n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); + + vec_validate (vm->mbuf_alloc_list, n - 1); + + if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) + return 0; + + _vec_len (vm->mbuf_alloc_list) = n; + + i = 0; + + while (i < (n - 7)) + { + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 4]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 5]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 6]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 7]), STORE); + + mb0 = vm->mbuf_alloc_list[i]; + mb1 = vm->mbuf_alloc_list[i + 1]; + mb2 = vm->mbuf_alloc_list[i + 2]; + mb3 = vm->mbuf_alloc_list[i + 3]; + + ASSERT (rte_mbuf_refcnt_read (mb0) == 0); + ASSERT (rte_mbuf_refcnt_read (mb1) == 0); + ASSERT (rte_mbuf_refcnt_read (mb2) == 0); + ASSERT (rte_mbuf_refcnt_read (mb3) == 0); + + rte_mbuf_refcnt_set (mb0, 1); + rte_mbuf_refcnt_set (mb1, 1); + rte_mbuf_refcnt_set (mb2, 1); + rte_mbuf_refcnt_set (mb3, 1); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + b1 = vlib_buffer_from_rte_mbuf (mb1); + b2 = vlib_buffer_from_rte_mbuf (mb2); + b3 = vlib_buffer_from_rte_mbuf (mb3); + + bi0 = vlib_get_buffer_index (vm, b0); + bi1 = vlib_get_buffer_index (vm, b1); + bi2 = vlib_get_buffer_index (vm, b2); + bi3 = vlib_get_buffer_index (vm, b3); + + vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi1, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi2, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi3, CLIB_CACHE_LINE_BYTES); + + vlib_buffer_init_for_free_list (b0, fl); + vlib_buffer_init_for_free_list (b1, fl); + vlib_buffer_init_for_free_list (b2, fl); + vlib_buffer_init_for_free_list (b3, fl); + + if (fl->buffer_init_function) + { + fl->buffer_init_function (vm, fl, &bi0, 1); + fl->buffer_init_function (vm, fl, &bi1, 1); + fl->buffer_init_function (vm, fl, &bi2, 1); + fl->buffer_init_function (vm, fl, &bi3, 1); + } + i += 4; + } + + while (i < n) + { + mb0 = vm->mbuf_alloc_list[i]; + + ASSERT (rte_mbuf_refcnt_read (mb0) == 0); + rte_mbuf_refcnt_set (mb0, 1); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + bi0 = vlib_get_buffer_index (vm, b0); + + vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); + + vlib_buffer_init_for_free_list (b0, fl); + + if (fl->buffer_init_function) + fl->buffer_init_function (vm, fl, &bi0, 1); + i++; + } + + fl->n_alloc += n; + + return n; +} + +static u32 +alloc_from_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + u32 * alloc_buffers, u32 n_alloc_buffers) +{ + u32 *dst, *src; + uword len, n_filled; + + dst = alloc_buffers; + + n_filled = fill_free_list (vm, free_list, n_alloc_buffers); + if (n_filled == 0) + return 0; + + len = vec_len (free_list->buffers); + ASSERT (len >= n_alloc_buffers); + + src = free_list->buffers + len - n_alloc_buffers; + clib_memcpy (dst, src, n_alloc_buffers * sizeof (u32)); + + _vec_len (free_list->buffers) -= n_alloc_buffers; + + return n_alloc_buffers; +} + +/* Allocate a given number of buffers into given array. + Returns number actually allocated which will be either zero or + number requested. */ +u32 +dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + return alloc_from_free_list + (vm, + pool_elt_at_index (bm->buffer_free_list_pool, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), + buffers, n_buffers); +} + + +u32 +dpdk_buffer_alloc_from_free_list (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *f; + f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); + return alloc_from_free_list (vm, f, buffers, n_buffers); +} + +static_always_inline void +vlib_buffer_free_inline (vlib_main_t * vm, + u32 * buffers, u32 n_buffers, u32 follow_buffer_next) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *fl; + u32 fi; + int i; + u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, + u32 follow_buffer_next); + + cb = bm->buffer_free_callback; + + if (PREDICT_FALSE (cb != 0)) + n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); + + if (!n_buffers) + return; + + for (i = 0; i < n_buffers; i++) + { + vlib_buffer_t *b; + + b = vlib_get_buffer (vm, buffers[i]); + + fl = vlib_buffer_get_buffer_free_list (vm, b, &fi); + + /* The only current use of this callback: multicast recycle */ + if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) + { + int j; + + vlib_buffer_add_to_free_list + (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); + + for (j = 0; j < vec_len (bm->announce_list); j++) + { + if (fl == bm->announce_list[j]) + goto already_announced; + } + vec_add1 (bm->announce_list, fl); + already_announced: + ; + } + else + { + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) + dpdk_rte_pktmbuf_free (vm, b); + } + } + if (vec_len (bm->announce_list)) + { + vlib_buffer_free_list_t *fl; + for (i = 0; i < vec_len (bm->announce_list); i++) + { + fl = bm->announce_list[i]; + fl->buffers_added_to_freelist_function (vm, fl); + } + _vec_len (bm->announce_list) = 0; + } +} + +static void +dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ + 1); +} + +static void +dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ + 0); +} + +static void +dpdk_packet_template_init (vlib_main_t * vm, + void *vt, + void *packet_data, + uword n_packet_data_bytes, + uword min_n_buffers_each_physmem_alloc, u8 * name) +{ + vlib_packet_template_t *t = (vlib_packet_template_t *) vt; + + vlib_worker_thread_barrier_sync (vm); + memset (t, 0, sizeof (t[0])); + + vec_add (t->packet_data, packet_data, n_packet_data_bytes); + + vlib_worker_thread_barrier_release (vm); +} + +clib_error_t * +vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, + unsigned socket_id) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_physmem_main_t *vpm = &vm->physmem_main; + struct rte_mempool *rmp; + int i; + + vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); + + /* pool already exists, nothing to do */ + if (dm->pktmbuf_pools[socket_id]) + return 0; + + u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); + + rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ + num_mbufs, /* number of mbufs */ + 512, /* cache size */ + VLIB_BUFFER_HDR_SIZE, /* priv size */ + VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ + socket_id); /* cpu socket */ + + if (rmp) + { + { + uword this_pool_end; + uword this_pool_start; + uword this_pool_size; + uword save_vpm_start, save_vpm_end, save_vpm_size; + struct rte_mempool_memhdr *memhdr; + + this_pool_start = ~0ULL; + this_pool_end = 0LL; + + STAILQ_FOREACH (memhdr, &rmp->mem_list, next) + { + if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) + this_pool_end = (uword) (memhdr->addr + memhdr->len); + if (((uword) memhdr->addr) < this_pool_start) + this_pool_start = (uword) (memhdr->addr); + } + ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); + this_pool_size = this_pool_end - this_pool_start; + + if (CLIB_DEBUG > 1) + { + clib_warning ("%s: pool start %llx pool end %llx pool size %lld", + pool_name, this_pool_start, this_pool_end, + this_pool_size); + clib_warning + ("before: virtual.start %llx virtual.end %llx virtual.size %lld", + vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); + } + + save_vpm_start = vpm->virtual.start; + save_vpm_end = vpm->virtual.end; + save_vpm_size = vpm->virtual.size; + + if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) + vpm->virtual.start = this_pool_start; + if (this_pool_end > vpm->virtual.end) + vpm->virtual.end = this_pool_end; + + vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; + + if (CLIB_DEBUG > 1) + { + clib_warning + ("after: virtual.start %llx virtual.end %llx virtual.size %lld", + vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); + } + + /* check if fits into buffer index range */ + if ((u64) vpm->virtual.size > + ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) + { + clib_warning ("physmem: virtual size out of range!"); + vpm->virtual.start = save_vpm_start; + vpm->virtual.end = save_vpm_end; + vpm->virtual.size = save_vpm_size; + rmp = 0; + } + } + if (rmp) + { + dm->pktmbuf_pools[socket_id] = rmp; + vec_free (pool_name); + return 0; + } + } + + vec_free (pool_name); + + /* no usable pool for this socket, try to use pool from another one */ + for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) + { + if (dm->pktmbuf_pools[i]) + { + clib_warning + ("WARNING: Failed to allocate mempool for CPU socket %u. " + "Threads running on socket %u will use socket %u mempool.", + socket_id, socket_id, i); + dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i]; + return 0; + } + } + + return clib_error_return (0, "failed to allocate mempool on socket %u", + socket_id); +} + +#if CLIB_DEBUG > 0 + +u32 *vlib_buffer_state_validation_lock; +uword *vlib_buffer_state_validation_hash; +void *vlib_buffer_state_heap; + +static clib_error_t * +buffer_state_validation_init (vlib_main_t * vm) +{ + void *oldheap; + + vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); + + oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + + vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); + vec_validate_aligned (vlib_buffer_state_validation_lock, 0, + CLIB_CACHE_LINE_BYTES); + clib_mem_set_heap (oldheap); + return 0; +} + +VLIB_INIT_FUNCTION (buffer_state_validation_init); +#endif + +static vlib_buffer_callbacks_t callbacks = { + .vlib_buffer_alloc_cb = &dpdk_buffer_alloc, + .vlib_buffer_alloc_from_free_list_cb = &dpdk_buffer_alloc_from_free_list, + .vlib_buffer_free_cb = &dpdk_buffer_free, + .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next, + .vlib_packet_template_init_cb = &dpdk_packet_template_init, + .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list, +}; + +static clib_error_t * +dpdk_buffer_init (vlib_main_t * vm) +{ + vlib_buffer_cb_register (vm, &callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_buffer_init); + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c new file mode 100644 index 00000000..d2def2fc --- /dev/null +++ b/src/plugins/dpdk/device/cli.c @@ -0,0 +1,2079 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +/** + * @file + * @brief CLI for DPDK Abstraction Layer and pcap Tx Trace. + * + * This file contains the source code for CLI for DPDK + * Abstraction Layer and pcap Tx Trace. + */ + + +static clib_error_t * +get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd, + dpdk_device_config_t ** devconf) +{ + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + struct rte_eth_dev_info dev_info; + uword *p = 0; + clib_error_t *error = NULL; + + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + if (subport_id != 0) + { + error = clib_error_return (0, "Invalid subport"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + *xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get ((*xd)->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + (*devconf) = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + (*devconf) = &dm->conf->default_devconf; + +done: + return error; +} + +static clib_error_t * +pcap_trace_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ +#define PCAP_DEF_PKT_TO_CAPTURE (100) + + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + u8 *filename; + u8 *chroot_filename = 0; + u32 max = 0; + int enabled = 0; + int errorFlag = 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "on")) + { + if (dm->tx_pcap_enable == 0) + { + enabled = 1; + } + else + { + vlib_cli_output (vm, "pcap tx capture already on..."); + errorFlag = 1; + break; + } + } + else if (unformat (line_input, "off")) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, "captured %d pkts...", + dm->pcap_main.n_packets_captured + 1); + if (dm->pcap_main.n_packets_captured) + { + dm->pcap_main.n_packets_to_capture = + dm->pcap_main.n_packets_captured; + error = pcap_write (&dm->pcap_main); + if (error) + clib_error_report (error); + else + vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); + } + + dm->tx_pcap_enable = 0; + } + else + { + vlib_cli_output (vm, "pcap tx capture already off..."); + errorFlag = 1; + break; + } + } + else if (unformat (line_input, "max %d", &max)) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, + "can't change max value while pcap tx capture active..."); + errorFlag = 1; + break; + } + } + else if (unformat (line_input, "intfc %U", + unformat_vnet_sw_interface, dm->vnet_main, + &dm->pcap_sw_if_index)) + ; + + else if (unformat (line_input, "intfc any")) + { + dm->pcap_sw_if_index = 0; + } + else if (unformat (line_input, "file %s", &filename)) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, + "can't change file while pcap tx capture active..."); + errorFlag = 1; + break; + } + + /* Brain-police user path input */ + if (strstr ((char *) filename, "..") + || index ((char *) filename, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", + filename); + vlib_cli_output (vm, + "Hint: Only filename, do not enter directory structure."); + vec_free (filename); + errorFlag = 1; + break; + } + + chroot_filename = format (0, "/tmp/%s%c", filename, 0); + vec_free (filename); + } + else if (unformat (line_input, "status")) + { + if (dm->pcap_sw_if_index == 0) + { + vlib_cli_output (vm, "max is %d for any interface to file %s", + dm-> + pcap_pkts_to_capture ? dm->pcap_pkts_to_capture + : PCAP_DEF_PKT_TO_CAPTURE, + dm-> + pcap_filename ? dm->pcap_filename : (u8 *) + "/tmp/vpe.pcap"); + } + else + { + vlib_cli_output (vm, "max is %d for interface %U to file %s", + dm-> + pcap_pkts_to_capture ? dm->pcap_pkts_to_capture + : PCAP_DEF_PKT_TO_CAPTURE, + format_vnet_sw_if_index_name, dm->vnet_main, + dm->pcap_sw_if_index, + dm-> + pcap_filename ? dm->pcap_filename : (u8 *) + "/tmp/vpe.pcap"); + } + + if (dm->tx_pcap_enable == 0) + { + vlib_cli_output (vm, "pcap tx capture is off..."); + } + else + { + vlib_cli_output (vm, "pcap tx capture is on: %d of %d pkts...", + dm->pcap_main.n_packets_captured, + dm->pcap_main.n_packets_to_capture); + } + break; + } + + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + errorFlag = 1; + break; + } + } + unformat_free (line_input); + + + if (errorFlag == 0) + { + /* Since no error, save configured values. */ + if (chroot_filename) + { + if (dm->pcap_filename) + vec_free (dm->pcap_filename); + vec_add1 (chroot_filename, 0); + dm->pcap_filename = chroot_filename; + } + + if (max) + dm->pcap_pkts_to_capture = max; + + + if (enabled) + { + if (dm->pcap_filename == 0) + dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); + + memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); + dm->pcap_main.file_name = (char *) dm->pcap_filename; + dm->pcap_main.n_packets_to_capture = PCAP_DEF_PKT_TO_CAPTURE; + if (dm->pcap_pkts_to_capture) + dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; + + dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; + dm->tx_pcap_enable = 1; + vlib_cli_output (vm, "pcap tx capture on..."); + } + } + else if (chroot_filename) + vec_free (chroot_filename); + + + return error; +} + +/*? + * This command is used to start or stop a packet capture, or show + * the status of packet capture. + * + * This command has the following optional parameters: + * + * - on|off - Used to start or stop a packet capture. + * + * - max - Depth of local buffer. Once 'nn' number + * of packets have been received, buffer is flushed to file. Once another + * 'nn' number of packets have been received, buffer is flushed + * to file, overwriting previous write. If not entered, value defaults + * to 100. Can only be updated if packet capture is off. + * + * - intfc |any - Used to specify a given interface, + * or use 'any' to run packet capture on all interfaces. + * 'any' is the default if not provided. Settings from a previous + * packet capture are preserved, so 'any' can be used to reset + * the interface setting. + * + * - file - Used to specify the output filename. The file will + * be placed in the '/tmp' directory, so only the filename is + * supported. Directory should not be entered. If file already exists, file + * will be overwritten. If no filename is provided, '/tmp/vpe.pcap' + * will be used. Can only be updated if packet capture is off. + * + * - status - Displays the current status and configured attributes + * associated with a packet capture. If packet capture is in progress, + * 'status' also will return the number of packets currently in + * the local buffer. All additional attributes entered on command line + * with 'status' will be ingnored and not applied. + * + * @cliexpar + * Example of how to display the status of a tx packet capture when off: + * @cliexstart{pcap tx trace status} + * max is 100, for any interface to file /tmp/vpe.pcap + * pcap tx capture is off... + * @cliexend + * Example of how to start a tx packet capture: + * @cliexstart{pcap tx trace on max 35 intfc GigabitEthernet0/8/0 file vppTest.pcap} + * pcap tx capture on... + * @cliexend + * Example of how to display the status of a tx packet capture in progress: + * @cliexstart{pcap tx trace status} + * max is 35, for interface GigabitEthernet0/8/0 to file /tmp/vppTest.pcap + * pcap tx capture is on: 20 of 35 pkts... + * @cliexend + * Example of how to stop a tx packet capture: + * @cliexstart{vppctl pcap tx trace off} + * captured 21 pkts... + * saved to /tmp/vppTest.pcap... + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (pcap_trace_command, static) = { + .path = "pcap tx trace", + .short_help = + "pcap tx trace [on|off] [max ] [intfc |any] [file ] [status]", + .function = pcap_trace_command_fn, +}; +/* *INDENT-ON* */ + + +static clib_error_t * +show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + struct rte_mempool *rmp; + int i; + + for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++) + { + rmp = dpdk_main.pktmbuf_pools[i]; + if (rmp) + { + unsigned count = rte_mempool_avail_count (rmp); + unsigned free_count = rte_mempool_in_use_count (rmp); + + vlib_cli_output (vm, + "name=\"%s\" available = %7d allocated = %7d total = %7d\n", + rmp->name, (u32) count, (u32) free_count, + (u32) (count + free_count)); + } + else + { + vlib_cli_output (vm, "rte_mempool is NULL (!)\n"); + } + } + return 0; +} + +/*? + * This command displays statistics of each DPDK mempool. + * + * @cliexpar + * Example of how to display DPDK buffer data: + * @cliexstart{show dpdk buffer} + * name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = { + .path = "show dpdk buffer", + .short_help = "show dpdk buffer", + .function = show_dpdk_buffer, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + static u32 *allocated_buffers; + u32 n_alloc = 0; + u32 n_free = 0; + u32 first, actual_alloc; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "allocate %d", &n_alloc)) + ; + else if (unformat (input, "free %d", &n_free)) + ; + else + break; + } + + if (n_free) + { + if (vec_len (allocated_buffers) < n_free) + return clib_error_return (0, "Can't free %d, only %d allocated", + n_free, vec_len (allocated_buffers)); + + first = vec_len (allocated_buffers) - n_free; + vlib_buffer_free (vm, allocated_buffers + first, n_free); + _vec_len (allocated_buffers) = first; + } + if (n_alloc) + { + first = vec_len (allocated_buffers); + vec_validate (allocated_buffers, + vec_len (allocated_buffers) + n_alloc - 1); + + actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first, + n_alloc); + _vec_len (allocated_buffers) = first + actual_alloc; + + if (actual_alloc < n_alloc) + vlib_cli_output (vm, "WARNING: only allocated %d buffers", + actual_alloc); + } + + vlib_cli_output (vm, "Currently %d buffers allocated", + vec_len (allocated_buffers)); + + if (allocated_buffers && vec_len (allocated_buffers) == 0) + vec_free (allocated_buffers); + + return 0; +} + +/*? + * This command tests the allocation and freeing of DPDK buffers. + * If both 'allocate' and 'free' are entered on the + * same command, the 'free' is executed first. If no + * parameters are provided, this command display how many DPDK buffers + * the test command has allocated. + * + * @cliexpar + * @parblock + * + * Example of how to display how many DPDK buffer test command has allcoated: + * @cliexstart{test dpdk buffer} + * Currently 0 buffers allocated + * @cliexend + * + * Example of how to allocate DPDK buffers using the test command: + * @cliexstart{test dpdk buffer allocate 10} + * Currently 10 buffers allocated + * @cliexend + * + * Example of how to free DPDK buffers allocated by the test command: + * @cliexstart{test dpdk buffer free 10} + * Currently 0 buffers allocated + * @cliexend + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = { + .path = "test dpdk buffer", + .short_help = "test dpdk buffer [allocate ] [free ]", + .function = test_dpdk_buffer, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 nb_rx_desc = (u32) ~ 0; + u32 nb_tx_desc = (u32) ~ 0; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "tx %d", &nb_tx_desc)) + ; + else if (unformat (line_input, "rx %d", &nb_rx_desc)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + { + error = + clib_error_return (0, + "number of descriptors can be set only for " + "physical devices"); + goto done; + } + + if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) && + (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc)) + { + error = clib_error_return (0, "nothing changed"); + goto done; + } + + if (nb_rx_desc != (u32) ~ 0) + xd->nb_rx_desc = nb_rx_desc; + + if (nb_tx_desc != (u32) ~ 0) + xd->nb_tx_desc = nb_tx_desc; + + error = dpdk_port_setup (dm, xd); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command sets the number of DPDK 'rx' and + * 'tx' descriptors for the given physical interface. Use + * the command 'show hardware-interface' to display the + * current descriptor allocation. + * + * @cliexpar + * Example of how to set the DPDK interface descriptors: + * @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { + .path = "set dpdk interface descriptors", + .short_help = "set dpdk interface descriptors [rx ] [tx ]", + .function = set_dpdk_if_desc, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + int cpu; + + if (tm->n_vlib_mains == 1) + vlib_cli_output (vm, "All interfaces are handled by main thread"); + + for (cpu = 0; cpu < vec_len (dm->devices_by_cpu); cpu++) + { + if (cpu >= dm->input_cpu_first_index && + cpu < (dm->input_cpu_first_index + dm->input_cpu_count)) + vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, + vlib_worker_threads[cpu].name, + vlib_worker_threads[cpu].lcore_id); + + /* *INDENT-OFF* */ + vec_foreach(dq, dm->devices_by_cpu[cpu]) + { + u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; + vnet_hw_interface_t * hi = vnet_get_hw_interface(dm->vnet_main, hw_if_index); + vlib_cli_output(vm, " %v queue %u", hi->name, dq->queue_id); + } + /* *INDENT-ON* */ + } + return 0; +} + +/*? + * This command is used to display the thread and core each + * DPDK interface and queue is assigned too. + * + * @cliexpar + * Example of how to display the DPDK interface placement: + * @cliexstart{show dpdk interface placement} + * Thread 1 (vpp_wk_0 at lcore 1): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1 at lcore 2): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = { + .path = "show dpdk interface placement", + .short_help = "show dpdk interface placement", + .function = show_dpdk_if_placement, +}; +/* *INDENT-ON* */ + +static int +dpdk_device_queue_sort (void *a1, void *a2) +{ + dpdk_device_and_queue_t *dq1 = a1; + dpdk_device_and_queue_t *dq2 = a2; + + if (dq1->device > dq2->device) + return 1; + else if (dq1->device < dq2->device) + return -1; + else if (dq1->queue_id > dq2->queue_id) + return 1; + else if (dq1->queue_id < dq2->queue_id) + return -1; + else + return 0; +} + +static clib_error_t * +set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 queue = (u32) 0; + u32 cpu = (u32) ~ 0; + int i; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "queue %d", &queue)) + ; + else if (unformat (line_input, "thread %d", &cpu)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + if (cpu < dm->input_cpu_first_index || + cpu >= (dm->input_cpu_first_index + dm->input_cpu_count)) + { + error = clib_error_return (0, "please specify valid thread id"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + for (i = 0; i < vec_len (dm->devices_by_cpu); i++) + { + /* *INDENT-OFF* */ + vec_foreach(dq, dm->devices_by_cpu[i]) + { + if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index && + queue == dq->queue_id) + { + if (cpu == i) /* nothing to do */ + goto done; + + vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->queue_id = queue; + dq->device = xd->device_index; + xd->cpu_socket_id_by_queue[queue] = + rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id); + + vec_sort_with_function(dm->devices_by_cpu[i], + dpdk_device_queue_sort); + + vec_sort_with_function(dm->devices_by_cpu[cpu], + dpdk_device_queue_sort); + + if (vec_len(dm->devices_by_cpu[i]) == 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_DISABLED); + + if (vec_len(dm->devices_by_cpu[cpu]) == 1) + vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + + goto done; + } + } + /* *INDENT-ON* */ + } + + error = clib_error_return (0, "not found"); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to assign a given interface, and optionally a + * given queue, to a different thread. This will not create a thread, + * so the thread must already exist. Use '/etc/vpp/startup.conf' + * for the initial thread creation. If the 'queue' is not provided, + * it defaults to 0. + * + * @cliexpar + * Example of how to display the DPDK interface placement: + * @cliexstart{show dpdk interface placement} + * Thread 1 (vpp_wk_0 at lcore 1): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1 at lcore 2): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend + * Example of how to assign a DPDK interface and queue to a thread: + * @cliexcmd{set dpdk interface placement GigabitEthernet0/8/0 queue 1 thread 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { + .path = "set dpdk interface placement", + .short_help = "set dpdk interface placement [queue ] thread ", + .function = set_dpdk_if_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + int cpu; + + if (tm->n_vlib_mains == 1) + vlib_cli_output (vm, "All interfaces are handled by main thread"); + + for (cpu = 0; cpu < vec_len (dm->devices_by_hqos_cpu); cpu++) + { + if (cpu >= dm->hqos_cpu_first_index && + cpu < (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) + vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, + vlib_worker_threads[cpu].name, + vlib_worker_threads[cpu].lcore_id); + + vec_foreach (dq, dm->devices_by_hqos_cpu[cpu]) + { + u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; + vnet_hw_interface_t *hi = + vnet_get_hw_interface (dm->vnet_main, hw_if_index); + vlib_cli_output (vm, " %v queue %u", hi->name, dq->queue_id); + } + } + return 0; +} + +/*? + * This command is used to display the thread and core each + * DPDK output interface and HQoS queue is assigned too. + * + * @cliexpar + * Example of how to display the DPDK output interface and HQoS queue placement: + * @cliexstart{show dpdk interface hqos placement} + * Thread 1 (vpp_hqos-threads_0 at lcore 3): + * GigabitEthernet0/8/0 queue 0 + * Thread 2 (vpp_hqos-threads_1 at lcore 4): + * GigabitEthernet0/9/0 queue 0 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos_placement, static) = { + .path = "show dpdk interface hqos placement", + .short_help = "show dpdk interface hqos placement", + .function = show_dpdk_if_hqos_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 cpu = (u32) ~ 0; + int i; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "thread %d", &cpu)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + if (cpu < dm->hqos_cpu_first_index || + cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) + { + error = clib_error_return (0, "please specify valid thread id"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + for (i = 0; i < vec_len (dm->devices_by_hqos_cpu); i++) + { + vec_foreach (dq, dm->devices_by_hqos_cpu[i]) + { + if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index) + { + if (cpu == i) /* nothing to do */ + goto done; + + vec_del1 (dm->devices_by_hqos_cpu[i], + dq - dm->devices_by_hqos_cpu[i]); + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->queue_id = 0; + dq->device = xd->device_index; + + vec_sort_with_function (dm->devices_by_hqos_cpu[i], + dpdk_device_queue_sort); + + vec_sort_with_function (dm->devices_by_hqos_cpu[cpu], + dpdk_device_queue_sort); + + goto done; + } + } + } + + error = clib_error_return (0, "not found"); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to assign a given DPDK output interface and + * HQoS queue to a different thread. This will not create a thread, + * so the thread must already exist. Use '/etc/vpp/startup.conf' + * for the initial thread creation. See @ref qos_doc for more details. + * + * @cliexpar + * Example of how to display the DPDK output interface and HQoS queue placement: + * @cliexstart{show dpdk interface hqos placement} + * Thread 1 (vpp_hqos-threads_0 at lcore 3): + * GigabitEthernet0/8/0 queue 0 + * Thread 2 (vpp_hqos-threads_1 at lcore 4): + * GigabitEthernet0/9/0 queue 0 + * @cliexend + * Example of how to assign a DPDK output interface and HQoS queue to a thread: + * @cliexcmd{set dpdk interface hqos placement GigabitEthernet0/8/0 thread 2} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_placement, static) = { + .path = "set dpdk interface hqos placement", + .short_help = "set dpdk interface hqos placement thread ", + .function = set_dpdk_if_hqos_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 subport_id = (u32) ~ 0; + u32 pipe_id = (u32) ~ 0; + u32 profile_id = (u32) ~ 0; + int rv; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "subport %d", &subport_id)) + ; + else if (unformat (line_input, "pipe %d", &pipe_id)) + ; + else if (unformat (line_input, "profile %d", &profile_id)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = + rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, + profile_id); + if (rv) + { + error = clib_error_return (0, "pipe configuration failed"); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to change the profile associate with a HQoS pipe. The + * '' is zero based. Use the command + * 'show dpdk interface hqos' to display the content of each profile. + * See @ref qos_doc for more details. + * + * @note + * Currently there is not an API to create a new HQoS pipe profile. One is + * created by default in the code (search for 'hqos_pipe_params_default''). + * Additional profiles can be created in code and code recompiled. Then use this + * command to assign it. + * + * @cliexpar + * Example of how to assign a new profile to a HQoS pipe: + * @cliexcmd{set dpdk interface hqos pipe GigabitEthernet0/8/0 subport 0 pipe 2 profile 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pipe, static) = +{ + .path = "set dpdk interface hqos pipe", + .short_help = "set dpdk interface hqos pipe subport pipe " + "profile ", + .function = set_dpdk_if_hqos_pipe, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = NULL; + u32 hw_if_index = (u32) ~ 0; + u32 subport_id = (u32) ~ 0; + struct rte_sched_subport_params p; + int rv; + clib_error_t *error = NULL; + u32 tb_rate = (u32) ~ 0; + u32 tb_size = (u32) ~ 0; + u32 tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = + { (u32) ~ 0, (u32) ~ 0, (u32) ~ 0, (u32) ~ 0 }; + u32 tc_period = (u32) ~ 0; + dpdk_device_config_t *devconf = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "subport %d", &subport_id)) + ; + else if (unformat (line_input, "rate %d", &tb_rate)) + ; + else if (unformat (line_input, "bktsize %d", &tb_size)) + ; + else if (unformat (line_input, "tc0 %d", &tc_rate[0])) + ; + else if (unformat (line_input, "tc1 %d", &tc_rate[1])) + ; + else if (unformat (line_input, "tc2 %d", &tc_rate[2])) + ; + else if (unformat (line_input, "tc3 %d", &tc_rate[3])) + ; + else if (unformat (line_input, "period %d", &tc_period)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + error = get_hqos (hw_if_index, subport_id, &xd, &devconf); + + if (error == NULL) + { + /* Copy the current values over to local structure. */ + memcpy (&p, &devconf->hqos.subport[subport_id], sizeof (p)); + + /* Update local structure with input values. */ + if (tb_rate != (u32) ~ 0) + { + p.tb_rate = tb_rate; + p.tc_rate[0] = tb_rate; + p.tc_rate[1] = tb_rate; + p.tc_rate[2] = tb_rate; + p.tc_rate[3] = tb_rate; + } + if (tb_size != (u32) ~ 0) + { + p.tb_size = tb_size; + } + if (tc_rate[0] != (u32) ~ 0) + { + p.tc_rate[0] = tc_rate[0]; + } + if (tc_rate[1] != (u32) ~ 0) + { + p.tc_rate[1] = tc_rate[1]; + } + if (tc_rate[2] != (u32) ~ 0) + { + p.tc_rate[2] = tc_rate[2]; + } + if (tc_rate[3] != (u32) ~ 0) + { + p.tc_rate[3] = tc_rate[3]; + } + if (tc_period != (u32) ~ 0) + { + p.tc_period = tc_period; + } + + /* Apply changes. */ + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); + if (rv) + { + error = clib_error_return (0, "subport configuration failed"); + goto done; + } + else + { + /* Successfully applied, so save of the input values. */ + memcpy (&devconf->hqos.subport[subport_id], &p, sizeof (p)); + } + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to set the subport level parameters such as token + * bucket rate (bytes per seconds), token bucket size (bytes), traffic class + * rates (bytes per seconds) and token update period (Milliseconds). + * + * By default, the 'rate' is set to 1250000000 bytes/second (10GbE + * rate) and each of the four traffic classes is set to 100% of the port rate. + * If the 'rate' is updated by this command, all four traffic classes + * are assigned the same value. Each of the four traffic classes can be updated + * individually. + * + * @cliexpar + * Example of how modify the subport attributes for a 1GbE link: + * @cliexcmd{set dpdk interface hqos subport GigabitEthernet0/8/0 subport 0 rate 125000000} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_subport, static) = { + .path = "set dpdk interface hqos subport", + .short_help = "set dpdk interface hqos subport subport " + "[rate ] [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] " + "[period ]", + .function = set_dpdk_if_hqos_subport, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 tc = (u32) ~ 0; + u32 queue = (u32) ~ 0; + u32 entry = (u32) ~ 0; + u32 val, i; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "entry %d", &entry)) + ; + else if (unformat (line_input, "tc %d", &tc)) + ; + else if (unformat (line_input, "queue %d", &queue)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + if (entry >= 64) + { + error = clib_error_return (0, "invalid entry"); + goto done; + } + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + error = clib_error_return (0, "invalid traffic class"); + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + error = clib_error_return (0, "invalid traffic class queue"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + /* Should never happen, shut up Coverity warning */ + if (p == 0) + { + error = clib_error_return (0, "no worker registrations?"); + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to set the traffic class translation table. The + * traffic class translation table is used to map 64 values (0-63) to one of + * four traffic class and one of four HQoS input queue. Use the 'show + * dpdk interface hqos' command to display the traffic class translation + * table. See @ref qos_doc for more details. + * + * This command has the following parameters: + * + * - - Used to specify the output interface. + * + * - entry - Mapped value (0-63) to assign traffic class and queue to. + * + * - tc - Traffic class (0-3) to be used by the provided mapped value. + * + * - queue - HQoS input queue (0-3) to be used by the provided mapped value. + * + * @cliexpar + * Example of how modify the traffic class translation table: + * @cliexcmd{set dpdk interface hqos tctbl GigabitEthernet0/8/0 entry 16 tc 2 queue 2} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_tctbl, static) = { + .path = "set dpdk interface hqos tctbl", + .short_help = "set dpdk interface hqos tctbl entry tc queue ", + .function = set_dpdk_if_hqos_tctbl, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + clib_error_t *error = NULL; + + /* Device specific data */ + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + /* Should never happen, shut up Coverity warning */ + if (p == 0) + return clib_error_return (0, "no worker registrations?"); + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + /* Packet field configuration */ + u64 mask = (u64) ~ 0; + u32 id = (u32) ~ 0; + u32 offset = (u32) ~ 0; + + /* HQoS params */ + u32 n_subports_per_port, n_pipes_per_subport, tctbl_size; + + u32 i; + + /* Parse input arguments */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "id subport")) + id = 0; + else if (unformat (line_input, "id pipe")) + id = 1; + else if (unformat (line_input, "id tc")) + id = 2; + else if (unformat (line_input, "id %d", &id)) + ; + else if (unformat (line_input, "offset %d", &offset)) + ; + else if (unformat (line_input, "mask %llx", &mask)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + /* Get interface */ + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + goto done; + } + + n_subports_per_port = devconf->hqos.port.n_subports_per_port; + n_pipes_per_subport = devconf->hqos.port.n_pipes_per_subport; + tctbl_size = RTE_DIM (devconf->hqos.tc_table); + + /* Validate packet field configuration: id, offset and mask */ + if (id >= 3) + { + error = clib_error_return (0, "invalid packet field id"); + goto done; + } + + switch (id) + { + case 0: + if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0) + { + error = clib_error_return (0, "invalid subport ID mask " + "(n_subports_per_port = %u)", + n_subports_per_port); + goto done; + } + break; + case 1: + if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0) + { + error = clib_error_return (0, "invalid pipe ID mask " + "(n_pipes_per_subport = %u)", + n_pipes_per_subport); + goto done; + } + break; + case 2: + default: + if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0) + { + error = clib_error_return (0, "invalid TC table index mask " + "(TC table size = %u)", tctbl_size); + goto done; + } + } + + /* Propagate packet field configuration to all workers */ + for (i = 0; i < worker_thread_count; i++) + switch (id) + { + case 0: + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabshr = + __builtin_ctzll (mask); + break; + case 1: + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabshr = + __builtin_ctzll (mask); + break; + case 2: + default: + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabshr = + __builtin_ctzll (mask); + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to set the packet fields required for classifiying the + * incoming packet. As a result of classification process, packet field + * information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, + * color) and stored in packet mbuf. + * + * This command has the following parameters: + * + * - - Used to specify the output interface. + * + * - id subport|pipe|tc - Classification occurs across three fields. + * This parameter indicates which of the three masks are being configured. Legacy + * code used 0-2 to represent these three fields, so 0-2 is still accepted. + * - subport|0 - Currently only one subport is supported, so only + * an empty mask is supported for the subport classification. + * - pipe|1 - Currently, 4096 pipes per subport are supported, so a + * 12-bit mask should be configure to map to the 0-4095 pipes. + * - tc|2 - The translation table (see 'set dpdk interface hqos + * tctbl' command) maps each value (0-63) into one of the 4 traffic classes + * per pipe. A 6-bit mask should be configure to map this field to a traffic class. + * + * - offset - Offset in the packet to apply the 64-bit mask for classification. + * The offset should be on an 8-byte boundary (0,8,16,24..). + * + * - mask - 64-bit mask to apply to packet at the given 'offset'. + * Bits must be contiguous and should not include '0x'. + * + * The default values for the 'pktfield' assumes Ethernet/IPv4/UDP packets with + * no VLAN. Adjust based on expected packet format and desired classification field. + * - 'subport' is always empty (offset 0 mask 0000000000000000) + * - By default, 'pipe' maps to the UDP payload bits 12 .. 23 (offset 40 + * mask 0000000fff000000) + * - By default, 'tc' maps to the DSCP field in IP header (offset 48 mask + * 00000000000000fc) + * + * @cliexpar + * Example of how modify the 'pipe' classification filter to match VLAN: + * @cliexcmd{set dpdk interface hqos pktfield GigabitEthernet0/8/0 id pipe offset 8 mask 0000000000000FFF} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pktfield, static) = { + .path = "set dpdk interface hqos pktfield", + .short_help = "set dpdk interface hqos pktfield id subport|pipe|tc offset " + "mask ", + .function = set_dpdk_if_hqos_pktfield, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + dpdk_device_config_hqos_t *cfg; + dpdk_device_hqos_per_hqos_thread_t *ht; + dpdk_device_hqos_per_worker_thread_t *wk; + u32 *tctbl; + u32 hw_if_index = (u32) ~ 0; + u32 profile_id, subport_id, i; + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + vlib_thread_registration_t *tr; + uword *p = 0; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify interface name!!"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + goto done; + } + + /* Detect the set of worker threads */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + /* Should never happen, shut up Coverity warning */ + if (p == 0) + { + error = clib_error_return (0, "no worker registrations?"); + goto done; + } + + tr = (vlib_thread_registration_t *) p[0]; + + cfg = &devconf->hqos; + ht = xd->hqos_ht; + wk = &xd->hqos_wt[tr->first_index]; + tctbl = wk->hqos_tc_table; + + vlib_cli_output (vm, " Thread:"); + vlib_cli_output (vm, " Input SWQ size = %u packets", cfg->swq_size); + vlib_cli_output (vm, " Enqueue burst size = %u packets", + ht->hqos_burst_enq); + vlib_cli_output (vm, " Dequeue burst size = %u packets", + ht->hqos_burst_deq); + + vlib_cli_output (vm, + " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx (subport)", + wk->hqos_field0_slabpos, wk->hqos_field0_slabmask); + vlib_cli_output (vm, + " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx (pipe)", + wk->hqos_field1_slabpos, wk->hqos_field1_slabmask); + vlib_cli_output (vm, + " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx (tc)", + wk->hqos_field2_slabpos, wk->hqos_field2_slabmask); + vlib_cli_output (vm, + " Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)"); + vlib_cli_output (vm, + " [ 0 .. 15]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[0] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[0] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[1] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[1] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[2] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[2] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[3] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[3] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[4] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[4] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[5] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[5] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[6] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[6] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[7] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[7] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[8] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[8] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[9] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[9] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[10] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[10] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[11] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[11] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[12] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[12] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[13] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[13] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[14] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[14] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[15] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[15] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [16 .. 31]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[16] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[16] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[17] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[17] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[18] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[18] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[19] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[19] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[20] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[20] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[21] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[21] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[22] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[22] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[23] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[23] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[24] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[24] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[25] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[25] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[26] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[26] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[27] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[27] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[28] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[28] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[29] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[29] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[30] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[30] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[31] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[31] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [32 .. 47]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[32] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[32] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[33] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[33] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[34] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[34] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[35] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[35] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[36] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[36] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[37] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[37] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[38] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[38] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[39] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[39] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[40] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[40] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[41] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[41] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[42] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[42] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[43] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[43] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[44] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[44] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[45] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[45] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[46] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[46] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[47] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[47] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [48 .. 63]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[48] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[48] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[49] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[49] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[50] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[50] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[51] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[51] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[52] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[52] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[53] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[53] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[54] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[54] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[55] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[55] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[56] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[56] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[57] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[57] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[58] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[58] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[59] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[59] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[60] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[60] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[61] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[61] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[62] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[62] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[63] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[63] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, " Port:"); + vlib_cli_output (vm, " Rate = %u bytes/second", cfg->port.rate); + vlib_cli_output (vm, " MTU = %u bytes", cfg->port.mtu); + vlib_cli_output (vm, " Frame overhead = %u bytes", + cfg->port.frame_overhead); + vlib_cli_output (vm, " Number of subports = %u", + cfg->port.n_subports_per_port); + vlib_cli_output (vm, " Number of pipes per subport = %u", + cfg->port.n_pipes_per_subport); + vlib_cli_output (vm, + " Packet queue size: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u packets", + cfg->port.qsize[0], cfg->port.qsize[1], cfg->port.qsize[2], + cfg->port.qsize[3]); + vlib_cli_output (vm, " Number of pipe profiles = %u", + cfg->port.n_pipe_profiles); + + for (subport_id = 0; subport_id < vec_len (cfg->subport); subport_id++) + { + vlib_cli_output (vm, " Subport %u:", subport_id); + vlib_cli_output (vm, " Rate = %u bytes/second", + cfg->subport[subport_id].tb_rate); + vlib_cli_output (vm, " Token bucket size = %u bytes", + cfg->subport[subport_id].tb_size); + vlib_cli_output (vm, + " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", + cfg->subport[subport_id].tc_rate[0], + cfg->subport[subport_id].tc_rate[1], + cfg->subport[subport_id].tc_rate[2], + cfg->subport[subport_id].tc_rate[3]); + vlib_cli_output (vm, " TC period = %u milliseconds", + cfg->subport[subport_id].tc_period); + } + + for (profile_id = 0; profile_id < vec_len (cfg->pipe); profile_id++) + { + vlib_cli_output (vm, " Pipe profile %u:", profile_id); + vlib_cli_output (vm, " Rate = %u bytes/second", + cfg->pipe[profile_id].tb_rate); + vlib_cli_output (vm, " Token bucket size = %u bytes", + cfg->pipe[profile_id].tb_size); + vlib_cli_output (vm, + " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", + cfg->pipe[profile_id].tc_rate[0], + cfg->pipe[profile_id].tc_rate[1], + cfg->pipe[profile_id].tc_rate[2], + cfg->pipe[profile_id].tc_rate[3]); + vlib_cli_output (vm, " TC period = %u milliseconds", + cfg->pipe[profile_id].tc_period); +#ifdef RTE_SCHED_SUBPORT_TC_OV + vlib_cli_output (vm, " TC3 oversubscription_weight = %u", + cfg->pipe[profile_id].tc_ov_weight); +#endif + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + { + vlib_cli_output (vm, + " TC%u WRR weights: Q0 = %u, Q1 = %u, Q2 = %u, Q3 = %u", + i, cfg->pipe[profile_id].wrr_weights[i * 4], + cfg->pipe[profile_id].wrr_weights[i * 4 + 1], + cfg->pipe[profile_id].wrr_weights[i * 4 + 2], + cfg->pipe[profile_id].wrr_weights[i * 4 + 3]); + } + } + +#ifdef RTE_SCHED_RED + vlib_cli_output (vm, " Weighted Random Early Detection (WRED):"); + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + { + vlib_cli_output (vm, " TC%u min: G = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].min_th, + cfg->port.red_params[i][e_RTE_METER_YELLOW].min_th, + cfg->port.red_params[i][e_RTE_METER_RED].min_th); + + vlib_cli_output (vm, " TC%u max: G = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].max_th, + cfg->port.red_params[i][e_RTE_METER_YELLOW].max_th, + cfg->port.red_params[i][e_RTE_METER_RED].max_th); + + vlib_cli_output (vm, + " TC%u inverted probability: G = %u, Y = %u, R = %u", + i, cfg->port.red_params[i][e_RTE_METER_GREEN].maxp_inv, + cfg->port.red_params[i][e_RTE_METER_YELLOW].maxp_inv, + cfg->port.red_params[i][e_RTE_METER_RED].maxp_inv); + + vlib_cli_output (vm, " TC%u weight: R = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].wq_log2, + cfg->port.red_params[i][e_RTE_METER_YELLOW].wq_log2, + cfg->port.red_params[i][e_RTE_METER_RED].wq_log2); + } +#endif + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to display details of an output interface's HQoS + * settings. + * + * @cliexpar + * Example of how to display HQoS settings for an interfaces: + * @cliexstart{show dpdk interface hqos GigabitEthernet0/8/0} + * Thread: + * Input SWQ size = 4096 packets + * Enqueue burst size = 256 packets + * Dequeue burst size = 220 packets + * Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) + * Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) + * Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) + * Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) + * [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * Port: + * Rate = 1250000000 bytes/second + * MTU = 1514 bytes + * Frame overhead = 24 bytes + * Number of subports = 1 + * Number of pipes per subport = 4096 + * Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets + * Number of pipe profiles = 2 + * Subport 0: + * Rate = 1250000000 bytes/second + * Token bucket size = 1000000 bytes + * Traffic class rate: TC0 = 1250000000, TC1 = 1250000000, TC2 = 1250000000, TC3 = 1250000000 bytes/second + * TC period = 10 milliseconds + * Pipe profile 0: + * Rate = 305175 bytes/second + * Token bucket size = 1000000 bytes + * Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second + * TC period = 40 milliseconds + * TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos, static) = { + .path = "show dpdk interface hqos", + .short_help = "show dpdk interface hqos ", + .function = show_dpdk_if_hqos, +}; + +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; +#ifdef RTE_SCHED_COLLECT_STATS + dpdk_main_t *dm = &dpdk_main; + u32 hw_if_index = (u32) ~ 0; + u32 subport = (u32) ~ 0; + u32 pipe = (u32) ~ 0; + u32 tc = (u32) ~ 0; + u32 tc_q = (u32) ~ 0; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + uword *p = 0; + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + u32 qindex; + struct rte_sched_queue_stats stats; + u16 qlen; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + + else if (unformat (line_input, "subport %d", &subport)) + ; + + else if (unformat (line_input, "pipe %d", &pipe)) + ; + + else if (unformat (line_input, "tc %d", &tc)) + ; + + else if (unformat (line_input, "tc_q %d", &tc_q)) + ; + + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify interface name!!"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + goto done; + } + + /* + * Figure out which queue to query. cf rte_sched_port_qindex. (Not sure why + * that method isn't made public by DPDK - how _should_ we get the queue ID?) + */ + qindex = subport * devconf->hqos.port.n_pipes_per_subport + pipe; + qindex = qindex * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + tc; + qindex = qindex * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + tc_q; + + if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) != + 0) + { + error = clib_error_return (0, "failed to read stats"); + goto done; + } + + vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value"); + vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts); + vlib_cli_output (vm, "%=24s%=16d", "Packets dropped", stats.n_pkts_dropped); +#ifdef RTE_SCHED_RED + vlib_cli_output (vm, "%=24s%=16d", "Packets dropped (RED)", + stats.n_pkts_red_dropped); +#endif + vlib_cli_output (vm, "%=24s%=16d", "Bytes", stats.n_bytes); + vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped); + +#else + + /* Get a line of input */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + vlib_cli_output (vm, "RTE_SCHED_COLLECT_STATS disabled in DPDK"); + goto done; + +#endif + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to display statistics associated with a HQoS traffic class + * queue. + * + * @note + * Statistic collection by the scheduler is disabled by default in DPDK. In order to + * turn it on, add the following line to '../vpp/dpdk/Makefile': + * - $(call set,RTE_SCHED_COLLECT_STATS,y) + * + * @cliexpar + * Example of how to display statistics of HQoS a HQoS traffic class queue: + * @cliexstart{show dpdk hqos queue GigabitEthernet0/9/0 subport 0 pipe 3181 tc 0 tc_q 0} + * Stats Parameter Value + * Packets 140 + * Packets dropped 0 + * Bytes 8400 + * Bytes dropped 0 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { + .path = "show dpdk hqos queue", + .short_help = "show dpdk hqos queue subport pipe tc tc_q ", + .function = show_dpdk_hqos_queue_stats, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_version_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ +#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c); + _("DPDK Version", "%s", rte_version ()); + _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); +#undef _ + return 0; +} + +/*? + * This command is used to display the current DPDK version and + * the list of arguments passed to DPDK when started. + * + * @cliexpar + * Example of how to display how many DPDK buffer test command has allcoated: + * @cliexstart{show dpdk version} + * DPDK Version: DPDK 16.11.0 + * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_vpe_version_command, static) = { + .path = "show dpdk version", + .short_help = "show dpdk version", + .function = show_dpdk_version_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +dpdk_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c new file mode 100644 index 00000000..50b26689 --- /dev/null +++ b/src/plugins/dpdk/device/device.c @@ -0,0 +1,852 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#define foreach_dpdk_tx_func_error \ + _(BAD_RETVAL, "DPDK tx function returned an error") \ + _(RING_FULL, "Tx packet drops (ring full)") \ + _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \ + _(REPL_FAIL, "Tx packet drops (replication failure)") + +typedef enum +{ +#define _(f,s) DPDK_TX_FUNC_ERROR_##f, + foreach_dpdk_tx_func_error +#undef _ + DPDK_TX_FUNC_N_ERROR, +} dpdk_tx_func_error_t; + +static char *dpdk_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_tx_func_error +#undef _ +}; + +clib_error_t * +dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address) +{ + int error; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + error = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) address); + + if (error) + { + return clib_error_return (0, "mac address set failed: %d", error); + } + else + { + vec_reset_length (xd->default_mac_address); + vec_add (xd->default_mac_address, address, sizeof (address)); + return NULL; + } +} + +clib_error_t * +dpdk_set_mc_filter (vnet_hw_interface_t * hi, + struct ether_addr mc_addr_vec[], int naddr) +{ + int error; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + error = rte_eth_dev_set_mc_addr_list (xd->device_index, mc_addr_vec, naddr); + + if (error) + { + return clib_error_return (0, "mc addr list failed: %d", error); + } + else + { + return NULL; + } +} + +struct rte_mbuf * +dpdk_replicate_packet_mb (vlib_buffer_t * b) +{ + dpdk_main_t *dm = &dpdk_main; + struct rte_mbuf **mbufs = 0, *s, *d; + u8 nb_segs; + unsigned socket_id = rte_socket_id (); + int i; + + ASSERT (dm->pktmbuf_pools[socket_id]); + s = rte_mbuf_from_vlib_buffer (b); + nb_segs = s->nb_segs; + vec_validate (mbufs, nb_segs - 1); + + if (rte_pktmbuf_alloc_bulk (dm->pktmbuf_pools[socket_id], mbufs, nb_segs)) + { + vec_free (mbufs); + return 0; + } + + d = mbufs[0]; + d->nb_segs = s->nb_segs; + d->data_len = s->data_len; + d->pkt_len = s->pkt_len; + d->data_off = s->data_off; + clib_memcpy (d->buf_addr, s->buf_addr, RTE_PKTMBUF_HEADROOM + s->data_len); + + for (i = 1; i < nb_segs; i++) + { + d->next = mbufs[i]; + d = mbufs[i]; + s = s->next; + d->data_len = s->data_len; + clib_memcpy (d->buf_addr, s->buf_addr, + RTE_PKTMBUF_HEADROOM + s->data_len); + } + + d = mbufs[0]; + vec_free (mbufs); + return d; +} + +static void +dpdk_tx_trace_buffer (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer) +{ + vlib_main_t *vm = vlib_get_main (); + dpdk_tx_dma_trace_t *t0; + struct rte_mbuf *mb; + + mb = rte_mbuf_from_vlib_buffer (buffer); + + t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0])); + t0->queue_index = queue_id; + t0->device_index = xd->device_index; + t0->buffer_index = buffer_index; + clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); + clib_memcpy (&t0->buffer, buffer, + sizeof (buffer[0]) - sizeof (buffer->pre_data)); + clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data, + sizeof (t0->buffer.pre_data)); +} + +static_always_inline void +dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, + int maybe_multiseg) +{ + struct rte_mbuf *mb, *first_mb, *last_mb; + + /* buffer is coming from non-dpdk source so we need to init + rte_mbuf header */ + if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_EXT_HDR_VALID) == 0)) + { + vlib_buffer_t *b2 = b; + last_mb = mb = rte_mbuf_from_vlib_buffer (b2); + rte_pktmbuf_reset (mb); + while (maybe_multiseg && (b2->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + b2 = vlib_get_buffer (vm, b2->next_buffer); + mb = rte_mbuf_from_vlib_buffer (b2); + rte_pktmbuf_reset (mb); + } + } + + last_mb = first_mb = mb = rte_mbuf_from_vlib_buffer (b); + first_mb->nb_segs = 1; + mb->data_len = b->current_length; + mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) : + b->current_length; + mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; + + while (maybe_multiseg && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + b = vlib_get_buffer (vm, b->next_buffer); + mb = rte_mbuf_from_vlib_buffer (b); + last_mb->next = mb; + last_mb = mb; + mb->data_len = b->current_length; + mb->pkt_len = b->current_length; + mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; + first_mb->nb_segs++; + if (PREDICT_FALSE (b->n_add_refs)) + { + rte_mbuf_refcnt_update (mb, b->n_add_refs); + b->n_add_refs = 0; + } + } +} + +/* + * This function calls the dpdk's tx_burst function to transmit the packets + * on the tx_vector. It manages a lock per-device if the device does not + * support multiple queues. It returns the number of packets untransmitted + * on the tx_vector. If all packets are transmitted (the normal case), the + * function returns 0. + * + * The function assumes there is at least one packet on the tx_vector. + */ +static_always_inline + u32 tx_burst_vector_internal (vlib_main_t * vm, + dpdk_device_t * xd, + struct rte_mbuf **tx_vector) +{ + dpdk_main_t *dm = &dpdk_main; + u32 n_packets; + u32 tx_head; + u32 tx_tail; + u32 n_retry; + int rv; + int queue_id; + tx_ring_hdr_t *ring; + + ring = vec_header (tx_vector, sizeof (*ring)); + + n_packets = ring->tx_head - ring->tx_tail; + + tx_head = ring->tx_head % xd->nb_tx_desc; + + /* + * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to + * unpredictable results. + */ + ASSERT (n_packets > 0); + + /* + * Check for tx_vector overflow. If this fails it is a system configuration + * error. The ring should be sized big enough to handle the largest un-flowed + * off burst from a traffic manager. A larger size also helps performance + * a bit because it decreases the probability of having to issue two tx_burst + * calls due to a ring wrap. + */ + ASSERT (n_packets < xd->nb_tx_desc); + ASSERT (ring->tx_tail == 0); + + n_retry = 16; + queue_id = vm->cpu_index; + + do + { + /* start the burst at the tail */ + tx_tail = ring->tx_tail % xd->nb_tx_desc; + + /* + * This device only supports one TX queue, + * and we're running multi-threaded... + */ + if (PREDICT_FALSE (xd->lockp != 0)) + { + queue_id = queue_id % xd->tx_q_used; + while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)) + /* zzzz */ + queue_id = (queue_id + 1) % xd->tx_q_used; + } + + if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ + { + /* no wrap, transmit in one burst */ + dpdk_device_hqos_per_worker_thread_t *hqos = + &xd->hqos_wt[vm->cpu_index]; + + ASSERT (hqos->swq != NULL); + + dpdk_hqos_metadata_set (hqos, + &tx_vector[tx_tail], tx_head - tx_tail); + rv = rte_ring_sp_enqueue_burst (hqos->swq, + (void **) &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + } + else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) + { + /* no wrap, transmit in one burst */ + rv = rte_eth_tx_burst (xd->device_index, + (uint16_t) queue_id, + &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + } + else + { + ASSERT (0); + rv = 0; + } + + if (PREDICT_FALSE (xd->lockp != 0)) + *xd->lockp[queue_id] = 0; + + if (PREDICT_FALSE (rv < 0)) + { + // emit non-fatal message, bump counter + vnet_main_t *vnm = dm->vnet_main; + vnet_interface_main_t *im = &vnm->interface_main; + u32 node_index; + + node_index = vec_elt_at_index (im->hw_interfaces, + xd->vlib_hw_if_index)->tx_node_index; + + vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); + clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, + rv); + return n_packets; // untransmitted packets + } + ring->tx_tail += (u16) rv; + n_packets -= (uint16_t) rv; + } + while (rv && n_packets && (n_retry > 0)); + + return n_packets; +} + +static_always_inline void +dpdk_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi) +{ + vlib_buffer_t *b; + struct rte_mbuf *mb; + b = vlib_get_buffer (vm, bi); + mb = rte_mbuf_from_vlib_buffer (b); + CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); +} + +static_always_inline void +dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp) +{ + dpdk_main_t *dm = &dpdk_main; + u32 my_cpu = vm->cpu_index; + struct rte_mbuf *mb_new; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0) + return; + + mb_new = dpdk_replicate_packet_mb (b); + if (PREDICT_FALSE (mb_new == 0)) + { + vlib_error_count (vm, node->node_index, + DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); + b->flags |= VLIB_BUFFER_REPL_FAIL; + } + else + *mbp = mb_new; + + vec_add1 (dm->recycle[my_cpu], bi); +} + +/* + * Transmits the packets on the frame to the interface associated with the + * node. It first copies packets on the frame to a tx_vector containing the + * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal + * which calls the dpdk tx_burst function. + */ +static uword +dpdk_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + dpdk_main_t *dm = &dpdk_main; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance); + u32 n_packets = f->n_vectors; + u32 n_left; + u32 *from; + struct rte_mbuf **tx_vector; + u16 i; + u16 nb_tx_desc = xd->nb_tx_desc; + int queue_id; + u32 my_cpu; + u32 tx_pkts = 0; + tx_ring_hdr_t *ring; + u32 n_on_ring; + + my_cpu = vm->cpu_index; + + queue_id = my_cpu; + + tx_vector = xd->tx_vectors[queue_id]; + ring = vec_header (tx_vector, sizeof (*ring)); + + n_on_ring = ring->tx_head - ring->tx_tail; + from = vlib_frame_vector_args (f); + + ASSERT (n_packets <= VLIB_FRAME_SIZE); + + if (PREDICT_FALSE (n_on_ring + n_packets > nb_tx_desc)) + { + /* + * Overflowing the ring should never happen. + * If it does then drop the whole frame. + */ + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL, + n_packets); + + while (n_packets--) + { + u32 bi0 = from[n_packets]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0); + rte_pktmbuf_free (mb0); + } + return n_on_ring; + } + + if (PREDICT_FALSE (dm->tx_pcap_enable)) + { + n_left = n_packets; + while (n_left > 0) + { + u32 bi0 = from[0]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + if (dm->pcap_sw_if_index == 0 || + dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX]) + pcap_add_buffer (&dm->pcap_main, vm, bi0, 512); + from++; + n_left--; + } + } + + from = vlib_frame_vector_args (f); + n_left = n_packets; + i = ring->tx_head % nb_tx_desc; + + while (n_left >= 8) + { + u32 bi0, bi1, bi2, bi3; + struct rte_mbuf *mb0, *mb1, *mb2, *mb3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 or_flags; + + dpdk_prefetch_buffer_by_index (vm, from[4]); + dpdk_prefetch_buffer_by_index (vm, from[5]); + dpdk_prefetch_buffer_by_index (vm, from[6]); + dpdk_prefetch_buffer_by_index (vm, from[7]); + + bi0 = from[0]; + bi1 = from[1]; + bi2 = from[2]; + bi3 = from[3]; + from += 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + or_flags = b0->flags | b1->flags | b2->flags | b3->flags; + + if (or_flags & VLIB_BUFFER_NEXT_PRESENT) + { + dpdk_validate_rte_mbuf (vm, b0, 1); + dpdk_validate_rte_mbuf (vm, b1, 1); + dpdk_validate_rte_mbuf (vm, b2, 1); + dpdk_validate_rte_mbuf (vm, b3, 1); + } + else + { + dpdk_validate_rte_mbuf (vm, b0, 0); + dpdk_validate_rte_mbuf (vm, b1, 0); + dpdk_validate_rte_mbuf (vm, b2, 0); + dpdk_validate_rte_mbuf (vm, b3, 0); + } + + mb0 = rte_mbuf_from_vlib_buffer (b0); + mb1 = rte_mbuf_from_vlib_buffer (b1); + mb2 = rte_mbuf_from_vlib_buffer (b2); + mb3 = rte_mbuf_from_vlib_buffer (b3); + + if (PREDICT_FALSE (or_flags & VLIB_BUFFER_RECYCLE)) + { + dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); + dpdk_buffer_recycle (vm, node, b1, bi1, &mb1); + dpdk_buffer_recycle (vm, node, b2, bi2, &mb2); + dpdk_buffer_recycle (vm, node, b3, bi3, &mb3); + + /* dont enqueue packets if replication failed as they must + be sent back to recycle */ + if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb0; + if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb1; + if (PREDICT_TRUE ((b2->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb2; + if (PREDICT_TRUE ((b3->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb3; + } + else + { + if (PREDICT_FALSE (i + 3 >= nb_tx_desc)) + { + tx_vector[i++ % nb_tx_desc] = mb0; + tx_vector[i++ % nb_tx_desc] = mb1; + tx_vector[i++ % nb_tx_desc] = mb2; + tx_vector[i++ % nb_tx_desc] = mb3; + i %= nb_tx_desc; + } + else + { + tx_vector[i++] = mb0; + tx_vector[i++] = mb1; + tx_vector[i++] = mb2; + tx_vector[i++] = mb3; + } + } + + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); + if (b1->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1); + if (b2->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi2, b2); + if (b3->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi3, b3); + } + + n_left -= 4; + } + while (n_left > 0) + { + u32 bi0; + struct rte_mbuf *mb0; + vlib_buffer_t *b0; + + bi0 = from[0]; + from++; + + b0 = vlib_get_buffer (vm, bi0); + + dpdk_validate_rte_mbuf (vm, b0, 1); + + mb0 = rte_mbuf_from_vlib_buffer (b0); + dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + if (b0->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); + + if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + { + tx_vector[i % nb_tx_desc] = mb0; + i++; + } + n_left--; + } + + /* account for additional packets in the ring */ + ring->tx_head += n_packets; + n_on_ring = ring->tx_head - ring->tx_tail; + + /* transmit as many packets as possible */ + n_packets = tx_burst_vector_internal (vm, xd, tx_vector); + + /* + * tx_pkts is the number of packets successfully transmitted + * This is the number originally on ring minus the number remaining on ring + */ + tx_pkts = n_on_ring - n_packets; + + { + /* If there is no callback then drop any non-transmitted packets */ + if (PREDICT_FALSE (n_packets)) + { + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + n_packets); + + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, + n_packets); + + while (n_packets--) + rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); + } + + /* Reset head/tail to avoid unnecessary wrap */ + ring->tx_head = 0; + ring->tx_tail = 0; + } + + /* Recycle replicated buffers */ + if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu]))) + { + vlib_buffer_free (vm, dm->recycle[my_cpu], + vec_len (dm->recycle[my_cpu])); + _vec_len (dm->recycle[my_cpu]) = 0; + } + + ASSERT (ring->tx_head >= ring->tx_tail); + + return tx_pkts; +} + +static void +dpdk_clear_hw_interface_counters (u32 instance) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance); + + /* + * Set the "last_cleared_stats" to the current stats, so that + * things appear to clear from a display perspective. + */ + dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); + + clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats)); + clib_memcpy (xd->last_cleared_xstats, xd->xstats, + vec_len (xd->last_cleared_xstats) * + sizeof (xd->last_cleared_xstats[0])); + +} + +static clib_error_t * +dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance); + int rv = 0; + + if (is_up) + { + f64 now = vlib_time_now (dm->vlib_main); + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) + { + rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + } + + if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) + rte_eth_promiscuous_enable (xd->device_index); + else + rte_eth_promiscuous_disable (xd->device_index); + + rte_eth_allmulticast_enable (xd->device_index); + xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP; + dpdk_update_counters (xd, now); + dpdk_update_link_state (xd, now); + } + else + { + xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP; + + rte_eth_allmulticast_disable (xd->device_index); + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + + /* For bonded interface, stop slave links */ + if (xd->pmd == VNET_DPDK_PMD_BOND) + { + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16); + while (nlink >= 1) + { + u8 dpdk_port = slink[--nlink]; + rte_eth_dev_stop (dpdk_port); + } + } + } + + if (rv < 0) + clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", rv); + + return /* no error */ 0; +} + +/* + * Dynamically redirect all pkts from a specific interface + * to the specified node + */ +static void +dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + dpdk_main_t *xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + xd->per_interface_next_index = node_index; + return; + } + + xd->per_interface_next_index = + vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index); +} + + +static clib_error_t * +dpdk_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + dpdk_main_t *xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + vnet_sw_interface_t *t = (vnet_sw_interface_t *) st; + int r, vlan_offload; + u32 prev_subifs = xd->num_subifs; + clib_error_t *err = 0; + + if (is_add) + xd->num_subifs++; + else if (xd->num_subifs) + xd->num_subifs--; + + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + goto done; + + /* currently we program VLANS only for IXGBE VF and I40E VF */ + if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF)) + goto done; + + if (t->sub.eth.flags.no_tags == 1) + goto done; + + if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1)) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "unsupported VLAN setup"); + goto done; + } + + vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index); + vlan_offload |= ETH_VLAN_FILTER_OFFLOAD; + + if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload))) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d", + xd->device_index, r); + goto done; + } + + + if ((r = + rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id, + is_add))) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d", + xd->device_index, r); + goto done; + } + +done: + if (xd->num_subifs) + xd->flags |= DPDK_DEVICE_FLAG_HAVE_SUBIF; + else + xd->flags &= ~DPDK_DEVICE_FLAG_HAVE_SUBIF; + + return err; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (dpdk_device_class) = { + .name = "dpdk", + .tx_function = dpdk_interface_tx, + .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR, + .tx_function_error_strings = dpdk_tx_func_error_strings, + .format_device_name = format_dpdk_device_name, + .format_device = format_dpdk_device, + .format_tx_trace = format_dpdk_tx_dma_trace, + .clear_counters = dpdk_clear_hw_interface_counters, + .admin_up_down_function = dpdk_interface_admin_up_down, + .subif_add_del_function = dpdk_subif_add_del_function, + .rx_redirect_to_node = dpdk_set_interface_next_node, + .mac_addr_change_function = dpdk_set_mac_address, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx) +/* *INDENT-ON* */ + +#define UP_DOWN_FLAG_EVENT 1 + +uword +admin_up_down_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + clib_error_t *error = 0; + uword event_type; + uword *event_data = 0; + u32 sw_if_index; + u32 flags; + + while (1) + { + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + + dpdk_main.admin_up_down_in_progress = 1; + + switch (event_type) + { + case UP_DOWN_FLAG_EVENT: + { + if (vec_len (event_data) == 2) + { + sw_if_index = event_data[0]; + flags = event_data[1]; + error = + vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index, + flags); + clib_error_report (error); + } + } + break; + } + + vec_reset_length (event_data); + + dpdk_main.admin_up_down_in_progress = 0; + + } + return 0; /* or not */ +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { + .function = admin_up_down_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "admin-up-down-process", + .process_log2_n_stack_bytes = 17, // 256KB +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h new file mode 100644 index 00000000..2a1a6205 --- /dev/null +++ b/src/plugins/dpdk/device/dpdk.h @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_dpdk_h__ +#define __included_dpdk_h__ + +/* $$$$ We should rename always_inline -> clib_always_inline */ +#undef always_inline + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if CLIB_DEBUG > 0 +#define always_inline static inline +#else +#define always_inline static inline __attribute__ ((__always_inline__)) +#endif + +#include + +#define NB_MBUF (16<<10) + +extern vnet_device_class_t dpdk_device_class; +extern vlib_node_registration_t dpdk_input_node; +extern vlib_node_registration_t handoff_dispatch_node; + +#define foreach_dpdk_pmd \ + _ ("net_thunderx", THUNDERX) \ + _ ("net_e1000_em", E1000EM) \ + _ ("net_e1000_igb", IGB) \ + _ ("net_e1000_igb_vf", IGBVF) \ + _ ("net_ixgbe", IXGBE) \ + _ ("net_ixgbe_vf", IXGBEVF) \ + _ ("net_i40e", I40E) \ + _ ("net_i40e_vf", I40EVF) \ + _ ("net_virtio", VIRTIO) \ + _ ("net_enic", ENIC) \ + _ ("net_vmxnet3", VMXNET3) \ + _ ("AF_PACKET PMD", AF_PACKET) \ + _ ("rte_bond_pmd", BOND) \ + _ ("net_fm10k", FM10K) \ + _ ("net_cxgbe", CXGBE) \ + _ ("net_mlx5", MLX5) \ + _ ("net_dpaa2", DPAA2) + +typedef enum +{ + VNET_DPDK_PMD_NONE, +#define _(s,f) VNET_DPDK_PMD_##f, + foreach_dpdk_pmd +#undef _ + VNET_DPDK_PMD_UNKNOWN, /* must be last */ +} dpdk_pmd_t; + +typedef enum +{ + VNET_DPDK_PORT_TYPE_ETH_1G, + VNET_DPDK_PORT_TYPE_ETH_10G, + VNET_DPDK_PORT_TYPE_ETH_40G, + VNET_DPDK_PORT_TYPE_ETH_100G, + VNET_DPDK_PORT_TYPE_ETH_BOND, + VNET_DPDK_PORT_TYPE_ETH_SWITCH, + VNET_DPDK_PORT_TYPE_AF_PACKET, + VNET_DPDK_PORT_TYPE_UNKNOWN, +} dpdk_port_type_t; + +/* + * The header for the tx_vector in dpdk_device_t. + * Head and tail are indexes into the tx_vector and are of type + * u64 so they never overflow. + */ +typedef struct +{ + u64 tx_head; + u64 tx_tail; +} tx_ring_hdr_t; + +typedef struct +{ + struct rte_ring *swq; + + u64 hqos_field0_slabmask; + u32 hqos_field0_slabpos; + u32 hqos_field0_slabshr; + u64 hqos_field1_slabmask; + u32 hqos_field1_slabpos; + u32 hqos_field1_slabshr; + u64 hqos_field2_slabmask; + u32 hqos_field2_slabpos; + u32 hqos_field2_slabshr; + u32 hqos_tc_table[64]; +} dpdk_device_hqos_per_worker_thread_t; + +typedef struct +{ + struct rte_ring **swq; + struct rte_mbuf **pkts_enq; + struct rte_mbuf **pkts_deq; + struct rte_sched_port *hqos; + u32 hqos_burst_enq; + u32 hqos_burst_deq; + u32 pkts_enq_len; + u32 swq_pos; + u32 flush_count; +} dpdk_device_hqos_per_hqos_thread_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + volatile u32 **lockp; + + /* Instance ID */ + u32 device_index; + + u32 vlib_hw_if_index; + u32 vlib_sw_if_index; + + /* next node index if we decide to steal the rx graph arc */ + u32 per_interface_next_index; + + /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */ + struct rte_mbuf ***tx_vectors; /* one per worker thread */ + struct rte_mbuf ***rx_vectors; + + /* vector of traced contexts, per device */ + u32 **d_trace_buffers; + + dpdk_pmd_t pmd:8; + i8 cpu_socket; + + u16 flags; +#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0) +#define DPDK_DEVICE_FLAG_PROMISC (1 << 1) +#define DPDK_DEVICE_FLAG_PMD (1 << 2) +#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3) +#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4) +#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) +#define DPDK_DEVICE_FLAG_HQOS (1 << 6) + + u16 nb_tx_desc; + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + + u8 *interface_name_suffix; + + /* number of sub-interfaces */ + u16 num_subifs; + + /* PMD related */ + u16 tx_q_used; + u16 rx_q_used; + u16 nb_rx_desc; + u16 *cpu_socket_id_by_queue; + struct rte_eth_conf port_conf; + struct rte_eth_txconf tx_conf; + + /* HQoS related */ + dpdk_device_hqos_per_worker_thread_t *hqos_wt; + dpdk_device_hqos_per_hqos_thread_t *hqos_ht; + + /* af_packet */ + u8 af_packet_port_id; + + struct rte_eth_link link; + f64 time_last_link_update; + + struct rte_eth_stats stats; + struct rte_eth_stats last_stats; + struct rte_eth_stats last_cleared_stats; + struct rte_eth_xstat *xstats; + struct rte_eth_xstat *last_cleared_xstats; + f64 time_last_stats_update; + dpdk_port_type_t port_type; + + /* mac address */ + u8 *default_mac_address; +} dpdk_device_t; + +#define DPDK_STATS_POLL_INTERVAL (10.0) +#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */ + +#define DPDK_LINK_POLL_INTERVAL (3.0) +#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ + +typedef struct +{ + u32 device; + u16 queue_id; +} dpdk_device_and_queue_t; + +#ifndef DPDK_HQOS_DBG_BYPASS +#define DPDK_HQOS_DBG_BYPASS 0 +#endif + +#ifndef HQOS_FLUSH_COUNT_THRESHOLD +#define HQOS_FLUSH_COUNT_THRESHOLD 100000 +#endif + +typedef struct dpdk_device_config_hqos_t +{ + u32 hqos_thread; + u32 hqos_thread_valid; + + u32 swq_size; + u32 burst_enq; + u32 burst_deq; + + u32 pktfield0_slabpos; + u32 pktfield1_slabpos; + u32 pktfield2_slabpos; + u64 pktfield0_slabmask; + u64 pktfield1_slabmask; + u64 pktfield2_slabmask; + u32 tc_table[64]; + + struct rte_sched_port_params port; + struct rte_sched_subport_params *subport; + struct rte_sched_pipe_params *pipe; + uint32_t *pipe_map; +} dpdk_device_config_hqos_t; + +int dpdk_hqos_validate_mask (u64 mask, u32 n); +void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * + hqos, u32 pipe_profile_id); +void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos); +clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd, + dpdk_device_config_hqos_t * hqos); +void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, + struct rte_mbuf **pkts, u32 n_pkts); + +#define foreach_dpdk_device_config_item \ + _ (num_rx_queues) \ + _ (num_tx_queues) \ + _ (num_rx_desc) \ + _ (num_tx_desc) \ + _ (rss_fn) + +typedef struct +{ + vlib_pci_addr_t pci_addr; + u8 is_blacklisted; + u8 vlan_strip_offload; +#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0 +#define DPDK_DEVICE_VLAN_STRIP_OFF 1 +#define DPDK_DEVICE_VLAN_STRIP_ON 2 + +#define _(x) uword x; + foreach_dpdk_device_config_item +#undef _ + clib_bitmap_t * workers; + u32 hqos_enabled; + dpdk_device_config_hqos_t hqos; +} dpdk_device_config_t; + +typedef struct +{ + + /* Config stuff */ + u8 **eal_init_args; + u8 *eal_init_args_str; + u8 *uio_driver_name; + u8 no_multi_seg; + u8 enable_tcp_udp_checksum; + u8 cryptodev; + + /* Required config parameters */ + u8 coremask_set_manually; + u8 nchannels_set_manually; + u32 coremask; + u32 nchannels; + u32 num_mbufs; + u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */ + + /* + * format interface names ala xxxEthernet%d/%d/%d instead of + * xxxEthernet%x/%x/%x. + */ + u8 interface_name_format_decimal; + + /* per-device config */ + dpdk_device_config_t default_devconf; + dpdk_device_config_t *dev_confs; + uword *device_config_index_by_pci_addr; + +} dpdk_config_main_t; + +dpdk_config_main_t dpdk_config_main; + +typedef struct +{ + + /* Devices */ + dpdk_device_t *devices; + dpdk_device_and_queue_t **devices_by_cpu; + dpdk_device_and_queue_t **devices_by_hqos_cpu; + + /* per-thread recycle lists */ + u32 **recycle; + + /* buffer flags template, configurable to enable/disable tcp / udp cksum */ + u32 buffer_flags_template; + + /* vlib buffer free list, must be same size as an rte_mbuf */ + u32 vlib_buffer_free_list_index; + + /* Ethernet input node index */ + u32 ethernet_input_node_index; + + /* pcap tracing [only works if (CLIB_DEBUG > 0)] */ + int tx_pcap_enable; + pcap_main_t pcap_main; + u8 *pcap_filename; + u32 pcap_sw_if_index; + u32 pcap_pkts_to_capture; + + /* hashes */ + uword *dpdk_device_by_kni_port_id; + uword *vu_sw_if_index_by_listener_fd; + uword *vu_sw_if_index_by_sock_fd; + u32 *vu_inactive_interfaces_device_index; + + /* + * flag indicating that a posted admin up/down + * (via post_sw_interface_set_flags) is in progress + */ + u8 admin_up_down_in_progress; + + u8 use_rss; + + /* which cpus are running dpdk-input */ + int input_cpu_first_index; + int input_cpu_count; + + /* which cpus are running I/O TX */ + int hqos_cpu_first_index; + int hqos_cpu_count; + + /* control interval of dpdk link state and stat polling */ + f64 link_state_poll_interval; + f64 stat_poll_interval; + + /* Sleep for this many MS after each device poll */ + u32 poll_sleep; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + dpdk_config_main_t *conf; + + /* mempool */ + struct rte_mempool **pktmbuf_pools; + + /* API message ID base */ + u16 msg_id_base; +} dpdk_main_t; + +dpdk_main_t dpdk_main; + +typedef struct +{ + u32 buffer_index; + u16 device_index; + u8 queue_index; + struct rte_mbuf mb; + /* Copy of VLIB buffer; packet data stored in pre_data. */ + vlib_buffer_t buffer; +} dpdk_tx_dma_trace_t; + +typedef struct +{ + u32 buffer_index; + u16 device_index; + u16 queue_index; + struct rte_mbuf mb; + vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */ + u8 data[256]; /* First 256 data bytes, used for hexdump */ +} dpdk_rx_dma_trace_t; + +void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b); + +clib_error_t *dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address); + +clib_error_t *dpdk_set_mc_filter (vnet_hw_interface_t * hi, + struct ether_addr mc_addr_vec[], int naddr); + +void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd); + +clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); + +u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); + +struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b); +struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); + +#define foreach_dpdk_error \ + _(NONE, "no error") \ + _(RX_PACKET_ERROR, "Rx packet errors") \ + _(RX_BAD_FCS, "Rx bad fcs") \ + _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \ + _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \ + _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \ + _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") + +typedef enum +{ +#define _(f,s) DPDK_ERROR_##f, + foreach_dpdk_error +#undef _ + DPDK_N_ERROR, +} dpdk_error_t; + +int dpdk_set_stat_poll_interval (f64 interval); +int dpdk_set_link_state_poll_interval (f64 interval); +void dpdk_update_link_state (dpdk_device_t * xd, f64 now); +void dpdk_device_lock_init (dpdk_device_t * xd); +void dpdk_device_lock_free (dpdk_device_t * xd); + +void dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 * buffers, uword n_buffers); + +#define EFD_OPERATION_LESS_THAN 0 +#define EFD_OPERATION_GREATER_OR_EQUAL 1 + +format_function_t format_dpdk_device_name; +format_function_t format_dpdk_device; +format_function_t format_dpdk_tx_dma_trace; +format_function_t format_dpdk_rx_dma_trace; +format_function_t format_dpdk_rte_mbuf; +format_function_t format_dpdk_rx_rte_mbuf; +unformat_function_t unformat_socket_mem; +clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn); +clib_error_t *unformat_hqos (unformat_input_t * input, + dpdk_device_config_hqos_t * hqos); + +uword +admin_up_down_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f); + +#endif /* __included_dpdk_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h new file mode 100644 index 00000000..dd40ff48 --- /dev/null +++ b/src/plugins/dpdk/device/dpdk_priv.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) +#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) + +#define DPDK_NB_RX_DESC_DEFAULT 1024 +#define DPDK_NB_TX_DESC_DEFAULT 1024 +#define DPDK_NB_RX_DESC_VIRTIO 256 +#define DPDK_NB_TX_DESC_VIRTIO 256 + +#define I40E_DEV_ID_SFP_XL710 0x1572 +#define I40E_DEV_ID_QSFP_A 0x1583 +#define I40E_DEV_ID_QSFP_B 0x1584 +#define I40E_DEV_ID_QSFP_C 0x1585 +#define I40E_DEV_ID_10G_BASE_T 0x1586 +#define I40E_DEV_ID_VF 0x154C + +/* These args appear by themselves */ +#define foreach_eal_double_hyphen_predicate_arg \ +_(no-shconf) \ +_(no-hpet) \ +_(no-huge) \ +_(vmware-tsc-map) + +#define foreach_eal_single_hyphen_mandatory_arg \ +_(coremask, c) \ +_(nchannels, n) \ + +#define foreach_eal_single_hyphen_arg \ +_(blacklist, b) \ +_(mem-alloc-request, m) \ +_(force-ranks, r) + +/* These args are preceeded by "--" and followed by a single string */ +#define foreach_eal_double_hyphen_arg \ +_(huge-dir) \ +_(proc-type) \ +_(file-prefix) \ +_(vdev) + +static inline void +dpdk_get_xstats (dpdk_device_t * xd) +{ + int len; + if ((len = rte_eth_xstats_get (xd->device_index, NULL, 0)) > 0) + { + vec_validate (xd->xstats, len - 1); + vec_validate (xd->last_cleared_xstats, len - 1); + + len = + rte_eth_xstats_get (xd->device_index, xd->xstats, + vec_len (xd->xstats)); + + ASSERT (vec_len (xd->xstats) == len); + ASSERT (vec_len (xd->last_cleared_xstats) == len); + + _vec_len (xd->xstats) = len; + _vec_len (xd->last_cleared_xstats) = len; + + } +} + + +static inline void +dpdk_update_counters (dpdk_device_t * xd, f64 now) +{ + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); + u32 my_cpu = os_get_cpu_number (); + u64 rxerrors, last_rxerrors; + + /* only update counters for PMD interfaces */ + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + return; + + xd->time_last_stats_update = now ? now : xd->time_last_stats_update; + clib_memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats)); + rte_eth_stats_get (xd->device_index, &xd->stats); + + /* maybe bump interface rx no buffer counter */ + if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_NO_BUF); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.rx_nombuf - + xd->last_stats.rx_nombuf); + } + + /* missed pkt counter */ + if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_MISS); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.imissed - + xd->last_stats.imissed); + } + rxerrors = xd->stats.ierrors; + last_rxerrors = xd->last_stats.ierrors; + + if (PREDICT_FALSE (rxerrors != last_rxerrors)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_ERROR); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + rxerrors - last_rxerrors); + } + + dpdk_get_xstats (xd); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c new file mode 100644 index 00000000..25a8c5cb --- /dev/null +++ b/src/plugins/dpdk/device/format.c @@ -0,0 +1,754 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#define foreach_dpdk_counter \ + _ (tx_frames_ok, opackets) \ + _ (tx_bytes_ok, obytes) \ + _ (tx_errors, oerrors) \ + _ (rx_frames_ok, ipackets) \ + _ (rx_bytes_ok, ibytes) \ + _ (rx_errors, ierrors) \ + _ (rx_missed, imissed) \ + _ (rx_no_bufs, rx_nombuf) + +#define foreach_dpdk_q_counter \ + _ (rx_frames_ok, q_ipackets) \ + _ (tx_frames_ok, q_opackets) \ + _ (rx_bytes_ok, q_ibytes) \ + _ (tx_bytes_ok, q_obytes) \ + _ (rx_errors, q_errors) + +#define foreach_dpdk_rss_hf \ + _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \ + _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \ + _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \ + _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \ + _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \ + _(ETH_RSS_IPV4, "ipv4") \ + _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \ + _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \ + _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \ + _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \ + _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \ + _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \ + _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \ + _(ETH_RSS_L2_PAYLOAD, "l2-payload") \ + _(ETH_RSS_IPV6_EX, "ipv6-ex") \ + _(ETH_RSS_IPV6, "ipv6") + + +#define foreach_dpdk_rx_offload_caps \ + _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \ + _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ + _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ + _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ + _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \ + _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip") + +#define foreach_dpdk_tx_offload_caps \ + _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \ + _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ + _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ + _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ + _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \ + _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \ + _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \ + _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \ + _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert") + +#define foreach_dpdk_pkt_rx_offload_flag \ + _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ + _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ + _ (PKT_RX_FDIR, "RX packet with FDIR infos") \ + _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \ + _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \ + _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \ + _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \ + _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \ + _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \ + _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \ + _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") + +#define foreach_dpdk_pkt_type \ + _ (L2, ETHER, "Ethernet packet") \ + _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \ + _ (L2, ETHER_ARP, "ARP packet") \ + _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \ + _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \ + _ (L2, ETHER_VLAN, "VLAN packet") \ + _ (L2, ETHER_QINQ, "QinQ packet") \ + _ (L3, IPV4, "IPv4 packet without extension headers") \ + _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \ + _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \ + _ (L3, IPV6, "IPv6 packet without extension headers") \ + _ (L3, IPV6_EXT, "IPv6 packet with extension headers") \ + _ (L3, IPV6_EXT_UNKNOWN, "IPv6 packet with or without extension headers") \ + _ (L4, TCP, "TCP packet") \ + _ (L4, UDP, "UDP packet") \ + _ (L4, FRAG, "Fragmented IP packet") \ + _ (L4, SCTP, "SCTP (Stream Control Transmission Protocol) packet") \ + _ (L4, ICMP, "ICMP packet") \ + _ (L4, NONFRAG, "Non-fragmented IP packet") \ + _ (TUNNEL, GRE, "GRE tunneling packet") \ + _ (TUNNEL, VXLAN, "VXLAN tunneling packet") \ + _ (TUNNEL, NVGRE, "NVGRE Tunneling packet") \ + _ (TUNNEL, GENEVE, "GENEVE Tunneling packet") \ + _ (TUNNEL, GRENAT, "Teredo, VXLAN or GRE Tunneling packet") \ + _ (INNER_L2, ETHER, "Inner Ethernet packet") \ + _ (INNER_L2, ETHER_VLAN, "Inner Ethernet packet with VLAN") \ + _ (INNER_L3, IPV4, "Inner IPv4 packet without extension headers") \ + _ (INNER_L3, IPV4_EXT, "Inner IPv4 packet with extension headers") \ + _ (INNER_L3, IPV4_EXT_UNKNOWN, "Inner IPv4 packet with or without extension headers") \ + _ (INNER_L3, IPV6, "Inner IPv6 packet without extension headers") \ + _ (INNER_L3, IPV6_EXT, "Inner IPv6 packet with extension headers") \ + _ (INNER_L3, IPV6_EXT_UNKNOWN, "Inner IPv6 packet with or without extension headers") \ + _ (INNER_L4, TCP, "Inner TCP packet") \ + _ (INNER_L4, UDP, "Inner UDP packet") \ + _ (INNER_L4, FRAG, "Inner fagmented IP packet") \ + _ (INNER_L4, SCTP, "Inner SCTP (Stream Control Transmission Protocol) packet") \ + _ (INNER_L4, ICMP, "Inner ICMP packet") \ + _ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet") + +#define foreach_dpdk_pkt_tx_offload_flag \ + _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \ + _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") + +#define foreach_dpdk_pkt_offload_flag \ + foreach_dpdk_pkt_rx_offload_flag \ + foreach_dpdk_pkt_tx_offload_flag + +u8 * +format_dpdk_device_name (u8 * s, va_list * args) +{ + dpdk_main_t *dm = &dpdk_main; + char *devname_format; + char *device_name; + u32 i = va_arg (*args, u32); + struct rte_eth_dev_info dev_info; + u8 *ret; + + if (dm->conf->interface_name_format_decimal) + devname_format = "%s%d/%d/%d"; + else + devname_format = "%s%x/%x/%x"; + + switch (dm->devices[i].port_type) + { + case VNET_DPDK_PORT_TYPE_ETH_1G: + device_name = "GigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_10G: + device_name = "TenGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_40G: + device_name = "FortyGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_100G: + device_name = "HundredGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_BOND: + return format (s, "BondEthernet%d", dm->devices[i].device_index); + + case VNET_DPDK_PORT_TYPE_ETH_SWITCH: + device_name = "EthernetSwitch"; + break; + + case VNET_DPDK_PORT_TYPE_AF_PACKET: + rte_eth_dev_info_get (i, &dev_info); + return format (s, "af_packet%d", dm->devices[i].af_packet_port_id); + + default: + case VNET_DPDK_PORT_TYPE_UNKNOWN: + device_name = "UnknownEthernet"; + break; + } + + rte_eth_dev_info_get (i, &dev_info); + + if (dev_info.pci_dev) + ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus, + dev_info.pci_dev->addr.devid, + dev_info.pci_dev->addr.function); + else + ret = format (s, "%s%d", device_name, dm->devices[i].device_index); + + if (dm->devices[i].interface_name_suffix) + return format (ret, "/%s", dm->devices[i].interface_name_suffix); + return ret; +} + +static u8 * +format_dpdk_device_type (u8 * s, va_list * args) +{ + dpdk_main_t *dm = &dpdk_main; + char *dev_type; + u32 i = va_arg (*args, u32); + + switch (dm->devices[i].pmd) + { + case VNET_DPDK_PMD_E1000EM: + dev_type = "Intel 82540EM (e1000)"; + break; + + case VNET_DPDK_PMD_IGB: + dev_type = "Intel e1000"; + break; + + case VNET_DPDK_PMD_I40E: + dev_type = "Intel X710/XL710 Family"; + break; + + case VNET_DPDK_PMD_I40EVF: + dev_type = "Intel X710/XL710 Family VF"; + break; + + case VNET_DPDK_PMD_FM10K: + dev_type = "Intel FM10000 Family Ethernet Switch"; + break; + + case VNET_DPDK_PMD_IGBVF: + dev_type = "Intel e1000 VF"; + break; + + case VNET_DPDK_PMD_VIRTIO: + dev_type = "Red Hat Virtio"; + break; + + case VNET_DPDK_PMD_IXGBEVF: + dev_type = "Intel 82599 VF"; + break; + + case VNET_DPDK_PMD_IXGBE: + dev_type = "Intel 82599"; + break; + + case VNET_DPDK_PMD_ENIC: + dev_type = "Cisco VIC"; + break; + + case VNET_DPDK_PMD_CXGBE: + dev_type = "Chelsio T4/T5"; + break; + + case VNET_DPDK_PMD_MLX5: + dev_type = "Mellanox ConnectX-4 Family"; + break; + + case VNET_DPDK_PMD_VMXNET3: + dev_type = "VMware VMXNET3"; + break; + + case VNET_DPDK_PMD_AF_PACKET: + dev_type = "af_packet"; + break; + + case VNET_DPDK_PMD_BOND: + dev_type = "Ethernet Bonding"; + break; + + case VNET_DPDK_PMD_DPAA2: + dev_type = "NXP DPAA2 Mac"; + break; + + default: + case VNET_DPDK_PMD_UNKNOWN: + dev_type = "### UNKNOWN ###"; + break; + } + + return format (s, dev_type); +} + +static u8 * +format_dpdk_link_status (u8 * s, va_list * args) +{ + dpdk_device_t *xd = va_arg (*args, dpdk_device_t *); + struct rte_eth_link *l = &xd->link; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); + + s = format (s, "%s ", l->link_status ? "up" : "down"); + if (l->link_status) + { + u32 promisc = rte_eth_promiscuous_get (xd->device_index); + + s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ? + "full" : "half"); + s = format (s, "speed %u mtu %d %s\n", l->link_speed, + hi->max_packet_bytes, promisc ? " promisc" : ""); + } + else + s = format (s, "\n"); + + return s; +} + +#define _line_len 72 +#define _(v, str) \ +if (bitmap & v) { \ + if (format_get_indent (s) > next_split ) { \ + next_split += _line_len; \ + s = format(s,"\n%U", format_white_space, indent); \ + } \ + s = format(s, "%s ", str); \ +} + +static u8 * +format_dpdk_rss_hf_name (u8 * s, va_list * args) +{ + u64 bitmap = va_arg (*args, u64); + int next_split = _line_len; + int indent = format_get_indent (s); + + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_rss_hf return s; +} + +static u8 * +format_dpdk_rx_offload_caps (u8 * s, va_list * args) +{ + u32 bitmap = va_arg (*args, u32); + int next_split = _line_len; + int indent = format_get_indent (s); + + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_rx_offload_caps return s; +} + +static u8 * +format_dpdk_tx_offload_caps (u8 * s, va_list * args) +{ + u32 bitmap = va_arg (*args, u32); + int next_split = _line_len; + int indent = format_get_indent (s); + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_tx_offload_caps return s; +} + +#undef _line_len +#undef _ + +u8 * +format_dpdk_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); + uword indent = format_get_indent (s); + f64 now = vlib_time_now (dm->vlib_main); + struct rte_eth_dev_info di; + + dpdk_update_counters (xd, now); + dpdk_update_link_state (xd, now); + + s = format (s, "%U\n%Ucarrier %U", + format_dpdk_device_type, xd->device_index, + format_white_space, indent + 2, format_dpdk_link_status, xd); + + rte_eth_dev_info_get (xd->device_index, &di); + + if (verbose > 1 && xd->flags & DPDK_DEVICE_FLAG_PMD) + { + struct rte_pci_device *pci; + struct rte_eth_rss_conf rss_conf; + int vlan_off; + int retval; + + rss_conf.rss_key = 0; + retval = rte_eth_dev_rss_hash_conf_get (xd->device_index, &rss_conf); + if (retval < 0) + clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval); + pci = di.pci_dev; + + if (pci) + s = + format (s, + "%Upci id: device %04x:%04x subsystem %04x:%04x\n" + "%Upci address: %04x:%02x:%02x.%02x\n", + format_white_space, indent + 2, pci->id.vendor_id, + pci->id.device_id, pci->id.subsystem_vendor_id, + pci->id.subsystem_device_id, format_white_space, indent + 2, + pci->addr.domain, pci->addr.bus, pci->addr.devid, + pci->addr.function); + s = + format (s, "%Umax rx packet len: %d\n", format_white_space, + indent + 2, di.max_rx_pktlen); + s = + format (s, "%Umax num of queues: rx %d tx %d\n", format_white_space, + indent + 2, di.max_rx_queues, di.max_tx_queues); + s = + format (s, "%Upromiscuous: unicast %s all-multicast %s\n", + format_white_space, indent + 2, + rte_eth_promiscuous_get (xd->device_index) ? "on" : "off", + rte_eth_promiscuous_get (xd->device_index) ? "on" : "off"); + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); + s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n", + format_white_space, indent + 2, + vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off"); + s = format (s, "%Urx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_rx_offload_caps, di.rx_offload_capa); + s = format (s, "%Utx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_tx_offload_caps, di.tx_offload_capa); + s = format (s, "%Urss active: %U\n" + "%Urss supported: %U\n", + format_white_space, indent + 2, + format_dpdk_rss_hf_name, rss_conf.rss_hf, + format_white_space, indent + 2, + format_dpdk_rss_hf_name, di.flow_type_rss_offloads); + } + + s = format (s, "%Urx queues %d, rx desc %d, tx queues %d, tx desc %d\n", + format_white_space, indent + 2, + xd->rx_q_used, xd->nb_rx_desc, xd->tx_q_used, xd->nb_tx_desc); + + if (xd->cpu_socket > -1) + s = format (s, "%Ucpu socket %d\n", + format_white_space, indent + 2, xd->cpu_socket); + + /* $$$ MIB counters */ + { +#define _(N, V) \ + if ((xd->stats.V - xd->last_cleared_stats.V) != 0) { \ + s = format (s, "\n%U%-40U%16Ld", \ + format_white_space, indent + 2, \ + format_c_identifier, #N, \ + xd->stats.V - xd->last_cleared_stats.V); \ + } \ + + foreach_dpdk_counter +#undef _ + } + + u8 *xs = 0; + u32 i = 0; + struct rte_eth_xstat *xstat, *last_xstat; + struct rte_eth_xstat_name *xstat_names = 0; + int len = rte_eth_xstats_get_names (xd->device_index, NULL, 0); + vec_validate (xstat_names, len - 1); + rte_eth_xstats_get_names (xd->device_index, xstat_names, len); + + ASSERT (vec_len (xd->xstats) == vec_len (xd->last_cleared_xstats)); + + /* *INDENT-OFF* */ + vec_foreach_index(i, xd->xstats) + { + u64 delta = 0; + xstat = vec_elt_at_index(xd->xstats, i); + last_xstat = vec_elt_at_index(xd->last_cleared_xstats, i); + + delta = xstat->value - last_xstat->value; + if (verbose == 2 || (verbose && delta)) + { + /* format_c_identifier doesn't like c strings inside vector */ + u8 * name = format(0,"%s", xstat_names[i].name); + xs = format(xs, "\n%U%-38U%16Ld", + format_white_space, indent + 4, + format_c_identifier, name, delta); + vec_free(name); + } + } + /* *INDENT-ON* */ + + vec_free (xstat_names); + + if (xs) + { + s = format (s, "\n%Uextended stats:%v", + format_white_space, indent + 2, xs); + vec_free (xs); + } + + return s; +} + +u8 * +format_dpdk_tx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + dpdk_tx_dma_trace_t *t = va_arg (*va, dpdk_tx_dma_trace_t *); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); + uword indent = format_get_indent (s); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + + s = format (s, "%U tx queue %d", + format_vnet_sw_interface_name, vnm, sw, t->queue_index); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U%U", format_white_space, indent, + format_ethernet_header_with_length, t->buffer.pre_data, + sizeof (t->buffer.pre_data)); + + return s; +} + +u8 * +format_dpdk_rx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + dpdk_rx_dma_trace_t *t = va_arg (*va, dpdk_rx_dma_trace_t *); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); + format_function_t *f; + uword indent = format_get_indent (s); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + + s = format (s, "%U rx queue %d", + format_vnet_sw_interface_name, vnm, sw, t->queue_index); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U%U", + format_white_space, indent, + format_dpdk_rte_mbuf, &t->mb, &t->data); + + if (vm->trace_main.verbose) + { + s = format (s, "\n%UPacket Dump%s", format_white_space, indent + 2, + t->mb.data_len > sizeof (t->data) ? " (truncated)" : ""); + s = format (s, "\n%U%U", format_white_space, indent + 4, + format_hexdump, &t->data, + t->mb.data_len > + sizeof (t->data) ? sizeof (t->data) : t->mb.data_len); + } + f = node->format_buffer; + if (!f) + f = format_hex_bytes; + s = format (s, "\n%U%U", format_white_space, indent, + f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); + + return s; +} + + +static inline u8 * +format_dpdk_pkt_types (u8 * s, va_list * va) +{ + u32 *pkt_types = va_arg (*va, u32 *); + uword indent __attribute__ ((unused)) = format_get_indent (s) + 2; + + if (!*pkt_types) + return s; + + s = format (s, "Packet Types"); + +#define _(L, F, S) \ + if ((*pkt_types & RTE_PTYPE_##L##_MASK) == RTE_PTYPE_##L##_##F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \ + "RTE_PTYPE_" #L "_" #F, RTE_PTYPE_##L##_##F, S); \ + } + + foreach_dpdk_pkt_type +#undef _ + return s; +} + +static inline u8 * +format_dpdk_pkt_offload_flags (u8 * s, va_list * va) +{ + u64 *ol_flags = va_arg (*va, u64 *); + uword indent = format_get_indent (s) + 2; + + if (!*ol_flags) + return s; + + s = format (s, "Packet Offload Flags"); + +#define _(F, S) \ + if (*ol_flags & F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", \ + format_white_space, indent, #F, F, S); \ + } + + foreach_dpdk_pkt_offload_flag +#undef _ + return s; +} + +u8 * +format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va) +{ + ethernet_vlan_header_tv_t *vlan_hdr = + va_arg (*va, ethernet_vlan_header_tv_t *); + + if (clib_net_to_host_u16 (vlan_hdr->type) == ETHERNET_TYPE_DOT1AD) + { + s = format (s, "%U 802.1q vlan ", + format_ethernet_vlan_tci, + clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); + vlan_hdr++; + } + + s = format (s, "%U", + format_ethernet_vlan_tci, + clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); + + return s; +} + +u8 * +format_dpdk_rte_mbuf (u8 * s, va_list * va) +{ + struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *); + ethernet_header_t *eth_hdr = va_arg (*va, ethernet_header_t *); + uword indent = format_get_indent (s) + 2; + + s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" + "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x" + "\n%Upacket_type 0x%x", + mb->port, mb->nb_segs, mb->pkt_len, + format_white_space, indent, + mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off, + mb->buf_physaddr, format_white_space, indent, mb->packet_type); + + if (mb->ol_flags) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_offload_flags, &mb->ol_flags); + + if ((mb->ol_flags & PKT_RX_VLAN_PKT) && + ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0)) + { + ethernet_vlan_header_tv_t *vlan_hdr = + ((ethernet_vlan_header_tv_t *) & (eth_hdr->type)); + s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); + } + + if (mb->packet_type) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_types, &mb->packet_type); + + return s; +} + +/* FIXME is this function used? */ +#if 0 +uword +unformat_socket_mem (unformat_input_t * input, va_list * va) +{ + uword **r = va_arg (*va, uword **); + int i = 0; + u32 mem; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, ",")) + hash_set (*r, i, 1024); + else if (unformat (input, "%u,", &mem)) + hash_set (*r, i, mem); + else if (unformat (input, "%u", &mem)) + hash_set (*r, i, mem); + else + { + unformat_put_input (input); + goto done; + } + i++; + } + +done: + return 1; +} +#endif + +clib_error_t * +unformat_rss_fn (unformat_input_t * input, uword * rss_fn) +{ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (0) + ; +#undef _ +#define _(f, s) \ + else if (unformat (input, s)) \ + *rss_fn |= f; + + foreach_dpdk_rss_hf +#undef _ + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + return 0; +} + +clib_error_t * +unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos) +{ + clib_error_t *error = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "hqos-thread %u", &hqos->hqos_thread)) + hqos->hqos_thread_valid = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + + return error; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c new file mode 100644 index 00000000..8824d789 --- /dev/null +++ b/src/plugins/dpdk/device/node.c @@ -0,0 +1,674 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +static char *dpdk_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_error +#undef _ +}; + +always_inline int +vlib_buffer_is_ip4 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); +} + +always_inline int +vlib_buffer_is_ip6 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); +} + +always_inline int +vlib_buffer_is_mpls (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); +} + +always_inline u32 +dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) +{ + if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0))) + if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0)) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_IP4_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0))) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; +} + +always_inline int +dpdk_mbuf_is_vlan (struct rte_mbuf *mb) +{ + return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == + RTE_PTYPE_L2_ETHER_VLAN; +} + +always_inline int +dpdk_mbuf_is_ip4 (struct rte_mbuf *mb) +{ + return RTE_ETH_IS_IPV4_HDR (mb->packet_type) != 0; +} + +always_inline int +dpdk_mbuf_is_ip6 (struct rte_mbuf *mb) +{ + return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0; +} + +always_inline u32 +dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0) +{ + if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb))) + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb))) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb))) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + else + return dpdk_rx_next_from_etype (mb, b0); +} + +always_inline void +dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error) +{ + if (mb->ol_flags & PKT_RX_IP_CKSUM_BAD) + { + *error = DPDK_ERROR_IP_CHECKSUM_ERROR; + *next = VNET_DEVICE_INPUT_NEXT_DROP; + } + else + *error = DPDK_ERROR_NONE; +} + +void +dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 * buffers, uword n_buffers) +{ + vlib_main_t *vm = vlib_get_main (); + u32 *b, n_left; + u32 next0; + + n_left = n_buffers; + b = buffers; + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t *b0; + dpdk_rx_dma_trace_t *t0; + struct rte_mbuf *mb; + u8 error0; + + bi0 = b[0]; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + mb = rte_mbuf_from_vlib_buffer (b0); + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + next0 = xd->per_interface_next_index; + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + next0 = dpdk_rx_next_from_mb (mb, b0); + else + next0 = dpdk_rx_next_from_etype (mb, b0); + + dpdk_rx_error_from_mb (mb, &next0, &error0); + + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->queue_index = queue_id; + t0->device_index = xd->device_index; + t0->buffer_index = bi0; + + clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); + clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + clib_memcpy (t0->buffer.pre_data, b0->data, + sizeof (t0->buffer.pre_data)); + clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data)); + + b += 1; + } +} + +static inline u32 +dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) +{ + u32 n_buffers; + u32 n_left; + u32 n_this_chunk; + + n_left = VLIB_FRAME_SIZE; + n_buffers = 0; + + if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) + { + while (n_left) + { + n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id, + xd->rx_vectors[queue_id] + + n_buffers, n_left); + n_buffers += n_this_chunk; + n_left -= n_this_chunk; + + /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */ + if (n_this_chunk < 32) + break; + } + } + else + { + ASSERT (0); + } + + return n_buffers; +} + + +static_always_inline void +dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, + struct rte_mbuf *mb, vlib_buffer_free_list_t * fl) +{ + u8 nb_seg = 1; + struct rte_mbuf *mb_seg = 0; + vlib_buffer_t *b_seg, *b_chain = 0; + mb_seg = mb->next; + b_chain = b; + + while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) + { + ASSERT (mb_seg != 0); + + b_seg = vlib_buffer_from_rte_mbuf (mb_seg); + vlib_buffer_init_for_free_list (b_seg, fl); + + ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + ASSERT (b_seg->current_data == 0); + + /* + * The driver (e.g. virtio) may not put the packet data at the start + * of the segment, so don't assume b_seg->current_data == 0 is correct. + */ + b_seg->current_data = + (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data; + + b_seg->current_length = mb_seg->data_len; + b->total_length_not_including_first_buffer += mb_seg->data_len; + + b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; + b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); + + b_chain = b_seg; + mb_seg = mb_seg->next; + nb_seg++; + } +} + +static_always_inline void +dpdk_prefetch_buffer (struct rte_mbuf *mb) +{ + vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); + CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE); +} + +/* + * This function is used when there are no worker threads. + * The main thread performs IO and forwards the packets. + */ +static_always_inline u32 +dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, + vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id) +{ + u32 n_buffers; + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + u32 n_left_to_next, *to_next; + u32 mb_index; + vlib_main_t *vm = vlib_get_main (); + uword n_rx_bytes = 0; + u32 n_trace, trace_cnt __attribute__ ((unused)); + vlib_buffer_free_list_t *fl; + u32 buffer_flags_template; + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) + return 0; + + n_buffers = dpdk_rx_burst (dm, xd, queue_id); + + if (n_buffers == 0) + { + return 0; + } + + buffer_flags_template = dm->buffer_flags_template; + + vec_reset_length (xd->d_trace_buffers[cpu_index]); + trace_cnt = n_trace = vlib_get_trace_count (vm, node); + + if (n_trace > 0) + { + u32 n = clib_min (n_trace, n_buffers); + mb_index = 0; + + while (n--) + { + struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++]; + vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); + vec_add1 (xd->d_trace_buffers[cpu_index], + vlib_get_buffer_index (vm, b)); + } + } + + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + mb_index = 0; + + while (n_buffers > 0) + { + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 bi0, next0, l3_offset0; + u32 bi1, next1, l3_offset1; + u32 bi2, next2, l3_offset2; + u32 bi3, next3, l3_offset3; + u8 error0, error1, error2, error3; + u64 or_ol_flags; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_buffers > 8 && n_left_to_next > 4) + { + struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; + struct rte_mbuf *mb1 = xd->rx_vectors[queue_id][mb_index + 1]; + struct rte_mbuf *mb2 = xd->rx_vectors[queue_id][mb_index + 2]; + struct rte_mbuf *mb3 = xd->rx_vectors[queue_id][mb_index + 3]; + + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 4]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 5]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 6]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 7]); + + if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + { + if (PREDICT_FALSE (mb0->nb_segs > 1)) + dpdk_prefetch_buffer (mb0->next); + if (PREDICT_FALSE (mb1->nb_segs > 1)) + dpdk_prefetch_buffer (mb1->next); + if (PREDICT_FALSE (mb2->nb_segs > 1)) + dpdk_prefetch_buffer (mb2->next); + if (PREDICT_FALSE (mb3->nb_segs > 1)) + dpdk_prefetch_buffer (mb3->next); + } + + ASSERT (mb0); + ASSERT (mb1); + ASSERT (mb2); + ASSERT (mb3); + + or_ol_flags = (mb0->ol_flags | mb1->ol_flags | + mb2->ol_flags | mb3->ol_flags); + b0 = vlib_buffer_from_rte_mbuf (mb0); + b1 = vlib_buffer_from_rte_mbuf (mb1); + b2 = vlib_buffer_from_rte_mbuf (mb2); + b3 = vlib_buffer_from_rte_mbuf (mb3); + + vlib_buffer_init_for_free_list (b0, fl); + vlib_buffer_init_for_free_list (b1, fl); + vlib_buffer_init_for_free_list (b2, fl); + vlib_buffer_init_for_free_list (b3, fl); + + bi0 = vlib_get_buffer_index (vm, b0); + bi1 = vlib_get_buffer_index (vm, b1); + bi2 = vlib_get_buffer_index (vm, b2); + bi3 = vlib_get_buffer_index (vm, b3); + + to_next[0] = bi0; + to_next[1] = bi1; + to_next[2] = bi2; + to_next[3] = bi3; + to_next += 4; + n_left_to_next -= 4; + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + { + next0 = next1 = next2 = next3 = xd->per_interface_next_index; + } + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + { + next0 = dpdk_rx_next_from_mb (mb0, b0); + next1 = dpdk_rx_next_from_mb (mb1, b1); + next2 = dpdk_rx_next_from_mb (mb2, b2); + next3 = dpdk_rx_next_from_mb (mb3, b3); + } + else + { + next0 = dpdk_rx_next_from_etype (mb0, b0); + next1 = dpdk_rx_next_from_etype (mb1, b1); + next2 = dpdk_rx_next_from_etype (mb2, b2); + next3 = dpdk_rx_next_from_etype (mb3, b3); + } + + if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD)) + { + dpdk_rx_error_from_mb (mb0, &next0, &error0); + dpdk_rx_error_from_mb (mb1, &next1, &error1); + dpdk_rx_error_from_mb (mb2, &next2, &error2); + dpdk_rx_error_from_mb (mb3, &next3, &error3); + b0->error = node->errors[error0]; + b1->error = node->errors[error1]; + b2->error = node->errors[error2]; + b3->error = node->errors[error3]; + } + else + { + b0->error = b1->error = node->errors[DPDK_ERROR_NONE]; + b2->error = b3->error = node->errors[DPDK_ERROR_NONE]; + } + + l3_offset0 = device_input_next_node_advance[next0]; + l3_offset1 = device_input_next_node_advance[next1]; + l3_offset2 = device_input_next_node_advance[next2]; + l3_offset3 = device_input_next_node_advance[next3]; + + b0->current_data = l3_offset0 + mb0->data_off; + b1->current_data = l3_offset1 + mb1->data_off; + b2->current_data = l3_offset2 + mb2->data_off; + b3->current_data = l3_offset3 + mb3->data_off; + + b0->current_data -= RTE_PKTMBUF_HEADROOM; + b1->current_data -= RTE_PKTMBUF_HEADROOM; + b2->current_data -= RTE_PKTMBUF_HEADROOM; + b3->current_data -= RTE_PKTMBUF_HEADROOM; + + b0->current_length = mb0->data_len - l3_offset0; + b1->current_length = mb1->data_len - l3_offset1; + b2->current_length = mb2->data_len - l3_offset2; + b3->current_length = mb3->data_len - l3_offset3; + + b0->flags = buffer_flags_template; + b1->flags = buffer_flags_template; + b2->flags = buffer_flags_template; + b3->flags = buffer_flags_template; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b2)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b3)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b2)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b3)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + n_rx_bytes += mb0->pkt_len; + n_rx_bytes += mb1->pkt_len; + n_rx_bytes += mb2->pkt_len; + n_rx_bytes += mb3->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + { + dpdk_process_subseq_segs (vm, b0, mb0, fl); + dpdk_process_subseq_segs (vm, b1, mb1, fl); + dpdk_process_subseq_segs (vm, b2, mb2, fl); + dpdk_process_subseq_segs (vm, b3, mb3, fl); + } + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); + + /* Do we have any driver RX features configured on the interface? */ + vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index, + &next0, &next1, &next2, &next3, + b0, b1, b2, b3, + l3_offset0, l3_offset1, + l3_offset2, l3_offset3); + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + n_buffers -= 4; + mb_index += 4; + } + while (n_buffers > 0 && n_left_to_next > 0) + { + struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; + + ASSERT (mb0); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + + /* Prefetch one next segment if it exists. */ + if (PREDICT_FALSE (mb0->nb_segs > 1)) + dpdk_prefetch_buffer (mb0->next); + + vlib_buffer_init_for_free_list (b0, fl); + + bi0 = vlib_get_buffer_index (vm, b0); + + to_next[0] = bi0; + to_next++; + n_left_to_next--; + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + next0 = xd->per_interface_next_index; + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + next0 = dpdk_rx_next_from_mb (mb0, b0); + else + next0 = dpdk_rx_next_from_etype (mb0, b0); + + dpdk_rx_error_from_mb (mb0, &next0, &error0); + b0->error = node->errors[error0]; + + l3_offset0 = device_input_next_node_advance[next0]; + + b0->current_data = l3_offset0; + b0->current_data += mb0->data_off - RTE_PKTMBUF_HEADROOM; + b0->current_length = mb0->data_len - l3_offset0; + + b0->flags = buffer_flags_template; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + n_rx_bytes += mb0->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + dpdk_process_subseq_segs (vm, b0, mb0, fl); + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + /* Do we have any driver RX features configured on the interface? */ + vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0, + b0, l3_offset0); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + n_buffers--; + mb_index++; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0)) + { + dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index], + vec_len (xd->d_trace_buffers[cpu_index])); + vlib_set_trace_count (vm, node, n_trace - + vec_len (xd->d_trace_buffers[cpu_index])); + } + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); + + vnet_device_increment_rx_packets (cpu_index, mb_index); + + return mb_index; +} + +static inline void +poll_rate_limit (dpdk_main_t * dm) +{ + /* Limit the poll rate by sleeping for N msec between polls */ + if (PREDICT_FALSE (dm->poll_sleep != 0)) + { + struct timespec ts, tsrem; + + ts.tv_sec = 0; + ts.tv_nsec = 1000 * 1000 * dm->poll_sleep; /* 1ms */ + + while (nanosleep (&ts, &tsrem) < 0) + { + ts = tsrem; + } + } +} + +/** \brief Main DPDK input node + @node dpdk-input + + This is the main DPDK input node: across each assigned interface, + call rte_eth_rx_burst(...) or similar to obtain a vector of + packets to process. Handle early packet discard. Derive @c + vlib_buffer_t metadata from struct rte_mbuf metadata, + Depending on the resulting metadata: adjust b->current_data, + b->current_length and dispatch directly to + ip4-input-no-checksum, or ip6-input. Trace the packet if required. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param f vlib_frame_t input-node, not used. + + @par Graph mechanics: buffer metadata, next index usage + + @em Uses: + - struct rte_mbuf mb->ol_flags + - PKT_RX_IP_CKSUM_BAD + - RTE_ETH_IS_xxx_HDR(mb->packet_type) + - packet classification result + + @em Sets: + - b->error if the packet is to be dropped immediately + - b->current_data, b->current_length + - adjusted as needed to skip the L2 header in direct-dispatch cases + - vnet_buffer(b)->sw_if_index[VLIB_RX] + - rx interface sw_if_index + - vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0 + - required by ipX-lookup + - b->flags + - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc. + + Next Nodes: + - Static arcs to: error-drop, ethernet-input, + ip4-input-no-checksum, ip6-input, mpls-input + - per-interface redirection, controlled by + xd->per_interface_next_index +*/ + +static uword +dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + uword n_rx_packets = 0; + dpdk_device_and_queue_t *dq; + u32 cpu_index = os_get_cpu_number (); + + /* + * Poll all devices on this cpu for input/interrupts. + */ + /* *INDENT-OFF* */ + vec_foreach (dq, dm->devices_by_cpu[cpu_index]) + { + xd = vec_elt_at_index(dm->devices, dq->device); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); + } + /* *INDENT-ON* */ + + poll_rate_limit (dm); + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_input_node) = { + .function = dpdk_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "dpdk-input", + .sibling_of = "device-input", + + /* Will be enabled if/when hardware is detected. */ + .state = VLIB_NODE_STATE_DISABLED, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_dpdk_rx_dma_trace, + + .n_errors = DPDK_N_ERROR, + .error_strings = dpdk_error_strings, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input); +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/dir.dox b/src/plugins/dpdk/dir.dox new file mode 100644 index 00000000..43e36753 --- /dev/null +++ b/src/plugins/dpdk/dir.dox @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Doxygen directory documentation */ + +/** +@dir +@brief DPDK Abstraction Layer. + +This directory contains the source code for the DPDK abstraction layer. + +*/ +/*? %%clicmd:group_label DPDK and pcap tx %% ?*/ +/*? %%syscfg:group_label DPDK and pcap tx %% ?*/ diff --git a/src/plugins/dpdk/hqos/hqos.c b/src/plugins/dpdk/hqos/hqos.c new file mode 100644 index 00000000..a288fca7 --- /dev/null +++ b/src/plugins/dpdk/hqos/hqos.c @@ -0,0 +1,775 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include /* enumerate all vlib messages */ + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +dpdk_main_t dpdk_main; + +/*** + * + * HQoS default configuration values + * + ***/ + +static dpdk_device_config_hqos_t hqos_params_default = { + .hqos_thread_valid = 0, + + .swq_size = 4096, + .burst_enq = 256, + .burst_deq = 220, + + /* + * Packet field to identify the subport. + * + * Default value: Since only one subport is defined by default (see below: + * n_subports_per_port = 1), the subport ID is hardcoded to 0. + */ + .pktfield0_slabpos = 0, + .pktfield0_slabmask = 0, + + /* + * Packet field to identify the pipe. + * + * Default value: Assuming Ethernet/IPv4/UDP packets, UDP payload bits 12 .. 23 + */ + .pktfield1_slabpos = 40, + .pktfield1_slabmask = 0x0000000FFF000000LLU, + + /* Packet field used as index into TC translation table to identify the traffic + * class and queue. + * + * Default value: Assuming Ethernet/IPv4 packets, IPv4 DSCP field + */ + .pktfield2_slabpos = 8, + .pktfield2_slabmask = 0x00000000000000FCLLU, + .tc_table = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + }, + + /* port */ + .port = { + .name = NULL, /* Set at init */ + .socket = 0, /* Set at init */ + .rate = 1250000000, /* Assuming 10GbE port */ + .mtu = 14 + 1500, /* Assuming Ethernet/IPv4 pkt (Ethernet FCS not included) */ + .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT, + .n_subports_per_port = 1, + .n_pipes_per_subport = 4096, + .qsize = {64, 64, 64, 64}, + .pipe_profiles = NULL, /* Set at config */ + .n_pipe_profiles = 1, + +#ifdef RTE_SCHED_RED + .red_params = { + /* Traffic Class 0 Colors Green / Yellow / Red */ + [0][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [0][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [0][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 1 - Colors Green / Yellow / Red */ + [1][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [1][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [1][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 2 - Colors Green / Yellow / Red */ + [2][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [2][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [2][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 3 - Colors Green / Yellow / Red */ + [3][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [3][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [3][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9} + }, +#endif /* RTE_SCHED_RED */ + }, +}; + +static struct rte_sched_subport_params hqos_subport_params_default = { + .tb_rate = 1250000000, /* 10GbE line rate (measured in bytes/second) */ + .tb_size = 1000000, + .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, + .tc_period = 10, +}; + +static struct rte_sched_pipe_params hqos_pipe_params_default = { + .tb_rate = 305175, /* 10GbE line rate divided by 4K pipes */ + .tb_size = 1000000, + .tc_rate = {305175, 305175, 305175, 305175}, + .tc_period = 40, +#ifdef RTE_SCHED_SUBPORT_TC_OV + .tc_ov_weight = 1, +#endif + .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, +}; + +/*** + * + * HQoS configuration + * + ***/ + +int +dpdk_hqos_validate_mask (u64 mask, u32 n) +{ + int count = __builtin_popcountll (mask); + int pos_lead = sizeof (u64) * 8 - __builtin_clzll (mask); + int pos_trail = __builtin_ctzll (mask); + int count_expected = __builtin_popcount (n - 1); + + /* Handle the exceptions */ + if (n == 0) + return -1; /* Error */ + + if ((mask == 0) && (n == 1)) + return 0; /* OK */ + + if (((mask == 0) && (n != 1)) || ((mask != 0) && (n == 1))) + return -2; /* Error */ + + /* Check that mask is contiguous */ + if ((pos_lead - pos_trail) != count) + return -3; /* Error */ + + /* Check that mask contains the expected number of bits set */ + if (count != count_expected) + return -4; /* Error */ + + return 0; /* OK */ +} + +void +dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * + hqos, u32 pipe_profile_id) +{ + memcpy (&hqos->pipe[pipe_profile_id], &hqos_pipe_params_default, + sizeof (hqos_pipe_params_default)); +} + +void +dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos) +{ + struct rte_sched_subport_params *subport_params; + struct rte_sched_pipe_params *pipe_params; + u32 *pipe_map; + u32 i; + + memcpy (hqos, &hqos_params_default, sizeof (hqos_params_default)); + + /* pipe */ + vec_add2 (hqos->pipe, pipe_params, hqos->port.n_pipe_profiles); + + for (i = 0; i < vec_len (hqos->pipe); i++) + memcpy (&pipe_params[i], + &hqos_pipe_params_default, sizeof (hqos_pipe_params_default)); + + hqos->port.pipe_profiles = hqos->pipe; + + /* subport */ + vec_add2 (hqos->subport, subport_params, hqos->port.n_subports_per_port); + + for (i = 0; i < vec_len (hqos->subport); i++) + memcpy (&subport_params[i], + &hqos_subport_params_default, + sizeof (hqos_subport_params_default)); + + /* pipe profile */ + vec_add2 (hqos->pipe_map, + pipe_map, + hqos->port.n_subports_per_port * hqos->port.n_pipes_per_subport); + + for (i = 0; i < vec_len (hqos->pipe_map); i++) + pipe_map[i] = 0; +} + +/*** + * + * HQoS init + * + ***/ + +clib_error_t * +dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + char name[32]; + u32 subport_id, i; + int rv; + + /* Detect the set of worker threads */ + int worker_thread_first = 0; + int worker_thread_count = 0; + + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + vlib_thread_registration_t *tr = + p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + worker_thread_first = tr->first_index; + worker_thread_count = tr->count; + } + + /* Allocate the per-thread device data array */ + vec_validate_aligned (xd->hqos_wt, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + memset (xd->hqos_wt, 0, tm->n_vlib_mains * sizeof (xd->hqos_wt[0])); + + vec_validate_aligned (xd->hqos_ht, 0, CLIB_CACHE_LINE_BYTES); + memset (xd->hqos_ht, 0, sizeof (xd->hqos_ht[0])); + + /* Allocate space for one SWQ per worker thread in the I/O TX thread data structure */ + vec_validate (xd->hqos_ht->swq, worker_thread_count); + + /* SWQ */ + for (i = 0; i < worker_thread_count + 1; i++) + { + u32 swq_flags = RING_F_SP_ENQ | RING_F_SC_DEQ; + + snprintf (name, sizeof (name), "SWQ-worker%u-to-device%u", i, + xd->device_index); + xd->hqos_ht->swq[i] = + rte_ring_create (name, hqos->swq_size, xd->cpu_socket, swq_flags); + if (xd->hqos_ht->swq[i] == NULL) + return clib_error_return (0, + "SWQ-worker%u-to-device%u: rte_ring_create err", + i, xd->device_index); + } + + /* + * HQoS + */ + + /* HQoS port */ + snprintf (name, sizeof (name), "HQoS%u", xd->device_index); + hqos->port.name = strdup (name); + if (hqos->port.name == NULL) + return clib_error_return (0, "HQoS%u: strdup err", xd->device_index); + + hqos->port.socket = rte_eth_dev_socket_id (xd->device_index); + if (hqos->port.socket == SOCKET_ID_ANY) + hqos->port.socket = 0; + + xd->hqos_ht->hqos = rte_sched_port_config (&hqos->port); + if (xd->hqos_ht->hqos == NULL) + return clib_error_return (0, "HQoS%u: rte_sched_port_config err", + xd->device_index); + + /* HQoS subport */ + for (subport_id = 0; subport_id < hqos->port.n_subports_per_port; + subport_id++) + { + u32 pipe_id; + + rv = + rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, + &hqos->subport[subport_id]); + if (rv) + return clib_error_return (0, + "HQoS%u subport %u: rte_sched_subport_config err (%d)", + xd->device_index, subport_id, rv); + + /* HQoS pipe */ + for (pipe_id = 0; pipe_id < hqos->port.n_pipes_per_subport; pipe_id++) + { + u32 pos = subport_id * hqos->port.n_pipes_per_subport + pipe_id; + u32 profile_id = hqos->pipe_map[pos]; + + rv = + rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, + profile_id); + if (rv) + return clib_error_return (0, + "HQoS%u subport %u pipe %u: rte_sched_pipe_config err (%d)", + xd->device_index, subport_id, pipe_id, + rv); + } + } + + /* Set up per-thread device data for the I/O TX thread */ + xd->hqos_ht->hqos_burst_enq = hqos->burst_enq; + xd->hqos_ht->hqos_burst_deq = hqos->burst_deq; + vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1); + vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1); + xd->hqos_ht->pkts_enq_len = 0; + xd->hqos_ht->swq_pos = 0; + xd->hqos_ht->flush_count = 0; + + /* Set up per-thread device data for each worker thread */ + for (i = 0; i < worker_thread_count + 1; i++) + { + u32 tid; + if (i) + tid = worker_thread_first + (i - 1); + else + tid = i; + + xd->hqos_wt[tid].swq = xd->hqos_ht->swq[i]; + xd->hqos_wt[tid].hqos_field0_slabpos = hqos->pktfield0_slabpos; + xd->hqos_wt[tid].hqos_field0_slabmask = hqos->pktfield0_slabmask; + xd->hqos_wt[tid].hqos_field0_slabshr = + __builtin_ctzll (hqos->pktfield0_slabmask); + xd->hqos_wt[tid].hqos_field1_slabpos = hqos->pktfield1_slabpos; + xd->hqos_wt[tid].hqos_field1_slabmask = hqos->pktfield1_slabmask; + xd->hqos_wt[tid].hqos_field1_slabshr = + __builtin_ctzll (hqos->pktfield1_slabmask); + xd->hqos_wt[tid].hqos_field2_slabpos = hqos->pktfield2_slabpos; + xd->hqos_wt[tid].hqos_field2_slabmask = hqos->pktfield2_slabmask; + xd->hqos_wt[tid].hqos_field2_slabshr = + __builtin_ctzll (hqos->pktfield2_slabmask); + memcpy (xd->hqos_wt[tid].hqos_tc_table, hqos->tc_table, + sizeof (hqos->tc_table)); + } + + return 0; +} + +/*** + * + * HQoS run-time + * + ***/ +/* + * dpdk_hqos_thread - Contains the main loop of an HQoS thread. + * + * w + * Information for the current thread + */ +static_always_inline void +dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + u32 cpu_index = vm->cpu_index; + u32 dev_pos; + + dev_pos = 0; + while (1) + { + vlib_worker_thread_barrier_check (); + + u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); + if (dev_pos >= n_devs) + dev_pos = 0; + + dpdk_device_and_queue_t *dq = + vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); + + dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; + u32 device_index = xd->device_index; + u16 queue_id = dq->queue_id; + + struct rte_mbuf **pkts_enq = hqos->pkts_enq; + u32 pkts_enq_len = hqos->pkts_enq_len; + u32 swq_pos = hqos->swq_pos; + u32 n_swq = vec_len (hqos->swq), i; + u32 flush_count = hqos->flush_count; + + for (i = 0; i < n_swq; i++) + { + /* Get current SWQ for this device */ + struct rte_ring *swq = hqos->swq[swq_pos]; + + /* Read SWQ burst to packet buffer of this device */ + pkts_enq_len += rte_ring_sc_dequeue_burst (swq, + (void **) + &pkts_enq[pkts_enq_len], + hqos->hqos_burst_enq); + + /* Get next SWQ for this device */ + swq_pos++; + if (swq_pos >= n_swq) + swq_pos = 0; + hqos->swq_pos = swq_pos; + + /* HWQ TX enqueue when burst available */ + if (pkts_enq_len >= hqos->hqos_burst_enq) + { + u32 n_pkts = rte_eth_tx_burst (device_index, + (uint16_t) queue_id, + pkts_enq, + (uint16_t) pkts_enq_len); + + for (; n_pkts < pkts_enq_len; n_pkts++) + rte_pktmbuf_free (pkts_enq[n_pkts]); + + pkts_enq_len = 0; + flush_count = 0; + break; + } + } + if (pkts_enq_len) + { + flush_count++; + if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + } + } + hqos->pkts_enq_len = pkts_enq_len; + hqos->flush_count = flush_count; + + /* Advance to next device */ + dev_pos++; + } +} + +static_always_inline void +dpdk_hqos_thread_internal (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + u32 cpu_index = vm->cpu_index; + u32 dev_pos; + + dev_pos = 0; + while (1) + { + vlib_worker_thread_barrier_check (); + + u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); + if (PREDICT_FALSE (n_devs == 0)) + { + dev_pos = 0; + continue; + } + if (dev_pos >= n_devs) + dev_pos = 0; + + dpdk_device_and_queue_t *dq = + vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); + + dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; + u32 device_index = xd->device_index; + u16 queue_id = dq->queue_id; + + struct rte_mbuf **pkts_enq = hqos->pkts_enq; + struct rte_mbuf **pkts_deq = hqos->pkts_deq; + u32 pkts_enq_len = hqos->pkts_enq_len; + u32 swq_pos = hqos->swq_pos; + u32 n_swq = vec_len (hqos->swq), i; + u32 flush_count = hqos->flush_count; + + /* + * SWQ dequeue and HQoS enqueue for current device + */ + for (i = 0; i < n_swq; i++) + { + /* Get current SWQ for this device */ + struct rte_ring *swq = hqos->swq[swq_pos]; + + /* Read SWQ burst to packet buffer of this device */ + pkts_enq_len += rte_ring_sc_dequeue_burst (swq, + (void **) + &pkts_enq[pkts_enq_len], + hqos->hqos_burst_enq); + + /* Get next SWQ for this device */ + swq_pos++; + if (swq_pos >= n_swq) + swq_pos = 0; + hqos->swq_pos = swq_pos; + + /* HQoS enqueue when burst available */ + if (pkts_enq_len >= hqos->hqos_burst_enq) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + break; + } + } + if (pkts_enq_len) + { + flush_count++; + if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + } + } + hqos->pkts_enq_len = pkts_enq_len; + hqos->flush_count = flush_count; + + /* + * HQoS dequeue and HWQ TX enqueue for current device + */ + { + u32 pkts_deq_len, n_pkts; + + pkts_deq_len = rte_sched_port_dequeue (hqos->hqos, + pkts_deq, + hqos->hqos_burst_deq); + + for (n_pkts = 0; n_pkts < pkts_deq_len;) + n_pkts += rte_eth_tx_burst (device_index, + (uint16_t) queue_id, + &pkts_deq[n_pkts], + (uint16_t) (pkts_deq_len - n_pkts)); + } + + /* Advance to next device */ + dev_pos++; + } +} + +void +dpdk_hqos_thread (vlib_worker_thread_t * w) +{ + vlib_main_t *vm; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + + vm = vlib_get_main (); + + ASSERT (vm->cpu_index == os_get_cpu_number ()); + + clib_time_init (&vm->clib_time); + clib_mem_set_heap (w->thread_mheap); + + /* Wait until the dpdk init sequence is complete */ + while (tm->worker_thread_release == 0) + vlib_worker_thread_barrier_check (); + + if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0) + return + clib_error + ("current I/O TX thread does not have any devices assigned to it"); + + if (DPDK_HQOS_DBG_BYPASS) + dpdk_hqos_thread_internal_hqos_dbg_bypass (vm); + else + dpdk_hqos_thread_internal (vm); +} + +void +dpdk_hqos_thread_fn (void *arg) +{ + vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; + vlib_worker_thread_init (w); + dpdk_hqos_thread (w); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_THREAD (hqos_thread_reg, static) = +{ + .name = "hqos-threads", + .short_name = "hqos-threads", + .function = dpdk_hqos_thread_fn, +}; +/* *INDENT-ON* */ + +/* + * HQoS run-time code to be called by the worker threads + */ +#define BITFIELD(byte_array, slab_pos, slab_mask, slab_shr) \ +({ \ + u64 slab = *((u64 *) &byte_array[slab_pos]); \ + u64 val = (rte_be_to_cpu_64(slab) & slab_mask) >> slab_shr; \ + val; \ +}) + +#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, traffic_class, queue, color) \ + ((((u64) (queue)) & 0x3) | \ + ((((u64) (traffic_class)) & 0x3) << 2) | \ + ((((u64) (color)) & 0x3) << 4) | \ + ((((u64) (subport)) & 0xFFFF) << 16) | \ + ((((u64) (pipe)) & 0xFFFFFFFF) << 32)) + +void +dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, + struct rte_mbuf **pkts, u32 n_pkts) +{ + u32 i; + + for (i = 0; i < (n_pkts & (~0x3)); i += 4) + { + struct rte_mbuf *pkt0 = pkts[i]; + struct rte_mbuf *pkt1 = pkts[i + 1]; + struct rte_mbuf *pkt2 = pkts[i + 2]; + struct rte_mbuf *pkt3 = pkts[i + 3]; + + u8 *pkt0_data = rte_pktmbuf_mtod (pkt0, u8 *); + u8 *pkt1_data = rte_pktmbuf_mtod (pkt1, u8 *); + u8 *pkt2_data = rte_pktmbuf_mtod (pkt2, u8 *); + u8 *pkt3_data = rte_pktmbuf_mtod (pkt3, u8 *); + + u64 pkt0_subport = BITFIELD (pkt0_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt0_pipe = BITFIELD (pkt0_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt0_dscp = BITFIELD (pkt0_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt0_tc = hqos->hqos_tc_table[pkt0_dscp & 0x3F] >> 2; + u32 pkt0_tc_q = hqos->hqos_tc_table[pkt0_dscp & 0x3F] & 0x3; + + u64 pkt1_subport = BITFIELD (pkt1_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt1_pipe = BITFIELD (pkt1_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt1_dscp = BITFIELD (pkt1_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt1_tc = hqos->hqos_tc_table[pkt1_dscp & 0x3F] >> 2; + u32 pkt1_tc_q = hqos->hqos_tc_table[pkt1_dscp & 0x3F] & 0x3; + + u64 pkt2_subport = BITFIELD (pkt2_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt2_pipe = BITFIELD (pkt2_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt2_dscp = BITFIELD (pkt2_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt2_tc = hqos->hqos_tc_table[pkt2_dscp & 0x3F] >> 2; + u32 pkt2_tc_q = hqos->hqos_tc_table[pkt2_dscp & 0x3F] & 0x3; + + u64 pkt3_subport = BITFIELD (pkt3_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt3_pipe = BITFIELD (pkt3_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt3_dscp = BITFIELD (pkt3_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt3_tc = hqos->hqos_tc_table[pkt3_dscp & 0x3F] >> 2; + u32 pkt3_tc_q = hqos->hqos_tc_table[pkt3_dscp & 0x3F] & 0x3; + + u64 pkt0_sched = RTE_SCHED_PORT_HIERARCHY (pkt0_subport, + pkt0_pipe, + pkt0_tc, + pkt0_tc_q, + 0); + u64 pkt1_sched = RTE_SCHED_PORT_HIERARCHY (pkt1_subport, + pkt1_pipe, + pkt1_tc, + pkt1_tc_q, + 0); + u64 pkt2_sched = RTE_SCHED_PORT_HIERARCHY (pkt2_subport, + pkt2_pipe, + pkt2_tc, + pkt2_tc_q, + 0); + u64 pkt3_sched = RTE_SCHED_PORT_HIERARCHY (pkt3_subport, + pkt3_pipe, + pkt3_tc, + pkt3_tc_q, + 0); + + pkt0->hash.sched.lo = pkt0_sched & 0xFFFFFFFF; + pkt0->hash.sched.hi = pkt0_sched >> 32; + pkt1->hash.sched.lo = pkt1_sched & 0xFFFFFFFF; + pkt1->hash.sched.hi = pkt1_sched >> 32; + pkt2->hash.sched.lo = pkt2_sched & 0xFFFFFFFF; + pkt2->hash.sched.hi = pkt2_sched >> 32; + pkt3->hash.sched.lo = pkt3_sched & 0xFFFFFFFF; + pkt3->hash.sched.hi = pkt3_sched >> 32; + } + + for (; i < n_pkts; i++) + { + struct rte_mbuf *pkt = pkts[i]; + + u8 *pkt_data = rte_pktmbuf_mtod (pkt, u8 *); + + u64 pkt_subport = BITFIELD (pkt_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt_pipe = BITFIELD (pkt_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt_dscp = BITFIELD (pkt_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt_tc = hqos->hqos_tc_table[pkt_dscp & 0x3F] >> 2; + u32 pkt_tc_q = hqos->hqos_tc_table[pkt_dscp & 0x3F] & 0x3; + + u64 pkt_sched = RTE_SCHED_PORT_HIERARCHY (pkt_subport, + pkt_pipe, + pkt_tc, + pkt_tc_q, + 0); + + pkt->hash.sched.lo = pkt_sched & 0xFFFFFFFF; + pkt->hash.sched.hi = pkt_sched >> 32; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/hqos/qos_doc.md b/src/plugins/dpdk/hqos/qos_doc.md new file mode 100644 index 00000000..7c064246 --- /dev/null +++ b/src/plugins/dpdk/hqos/qos_doc.md @@ -0,0 +1,411 @@ +# QoS Hierarchical Scheduler {#qos_doc} + +The Quality-of-Service (QoS) scheduler performs egress-traffic management by +prioritizing the transmission of the packets of different type services and +subcribers based on the Service Level Agreements (SLAs). The QoS scheduler can +be enabled on one or more NIC output interfaces depending upon the +requirement. + + +## Overview + +The QoS schdeuler supports a number of scheduling and shaping levels which +construct hierarchical-tree. The first level in the hierarchy is port (i.e. +the physical interface) that constitutes the root node of the tree. The +subsequent level is subport which represents the group of the +users/subscribers. The individual user/subscriber is represented by the pipe +at the next level. Each user can have different traffic type based on the +criteria of specific loss rate, jitter, and latency. These traffic types are +represented at the traffic-class level in the form of different traffic- +classes. The last level contains number of queues which are grouped together +to host the packets of the specific class type traffic. + +The QoS scheduler implementation requires flow classification, enqueue and +dequeue operations. The flow classification is mandatory stage for HQoS where +incoming packets are classified by mapping the packet fields information to +5-tuple (HQoS subport, pipe, traffic class, queue within traffic class, and +color) and storing that information in mbuf sched field. The enqueue operation +uses this information to determine the queue for storing the packet, and at +this stage, if the specific queue is full, QoS drops the packet. The dequeue +operation consists of scheduling the packet based on its length and available +credits, and handing over the scheduled packet to the output interface. + +For more information on QoS Scheduler, please refer DPDK Programmer's Guide- +http://dpdk.org/doc/guides/prog_guide/qos_framework.html + + +### QoS Schdeuler Parameters + +Following illustrates the default HQoS configuration for each 10GbE output +port: + +Single subport (subport 0): + - Subport rate set to 100% of port rate + - Each of the 4 traffic classes has rate set to 100% of port rate + +4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration: + - Pipe rate set to 1/4K of port rate + - Each of the 4 traffic classes has rate set to 100% of pipe rate + - Within each traffic class, the byte-level WRR weights for the 4 queues are set to 1:1:1:1 + + +#### Port configuration + +``` +port { + rate 1250000000 /* Assuming 10GbE port */ + frame_overhead 24 /* Overhead fields per Ethernet frame: + * 7B (Preamble) + + * 1B (Start of Frame Delimiter (SFD)) + + * 4B (Frame Check Sequence (FCS)) + + * 12B (Inter Frame Gap (IFG)) + */ + mtu 1522 /* Assuming Ethernet/IPv4 pkt (FCS not included) */ + n_subports_per_port 1 /* Number of subports per output interface */ + n_pipes_per_subport 4096 /* Number of pipes (users/subscribers) */ + queue_sizes 64 64 64 64 /* Packet queue size for each traffic class. + * All queues within the same pipe traffic class + * have the same size. Queues from different + * pipes serving the same traffic class have + * the same size. */ +} +``` + + +#### Subport configuration + +``` +subport 0 { + tb_rate 1250000000 /* Subport level token bucket rate (bytes per second) */ + tb_size 1000000 /* Subport level token bucket size (bytes) */ + tc0_rate 1250000000 /* Subport level token bucket rate for traffic class 0 (bytes per second) */ + tc1_rate 1250000000 /* Subport level token bucket rate for traffic class 1 (bytes per second) */ + tc2_rate 1250000000 /* Subport level token bucket rate for traffic class 2 (bytes per second) */ + tc3_rate 1250000000 /* Subport level token bucket rate for traffic class 3 (bytes per second) */ + tc_period 10 /* Time interval for refilling the token bucket associated with traffic class (Milliseconds) */ + pipe 0 4095 profile 0 /* pipes (users/subscribers) configured with pipe profile 0 */ +} +``` + + +#### Pipe configuration + +``` +pipe_profile 0 { + tb_rate 305175 /* Pipe level token bucket rate (bytes per second) */ + tb_size 1000000 /* Pipe level token bucket size (bytes) */ + tc0_rate 305175 /* Pipe level token bucket rate for traffic class 0 (bytes per second) */ + tc1_rate 305175 /* Pipe level token bucket rate for traffic class 1 (bytes per second) */ + tc2_rate 305175 /* Pipe level token bucket rate for traffic class 2 (bytes per second) */ + tc3_rate 305175 /* Pipe level token bucket rate for traffic class 3 (bytes per second) */ + tc_period 40 /* Time interval for refilling the token bucket associated with traffic class at pipe level (Milliseconds) */ + tc3_oversubscription_weight 1 /* Weight traffic class 3 oversubscription */ + tc0_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 0 */ + tc1_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 1 */ + tc2_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 2 */ + tc3_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 3 */ +} +``` + + +#### Random Early Detection (RED) parameters per traffic class and color (Green / Yellow / Red) + +``` +red { + tc0_wred_min 48 40 32 /* Minimum threshold for traffic class 0 queue (min_th) in number of packets */ + tc0_wred_max 64 64 64 /* Maximum threshold for traffic class 0 queue (max_th) in number of packets */ + tc0_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 0 queue (maxp = 1 / maxp_inv) */ + tc0_wred_weight 9 9 9 /* Traffic Class 0 queue weight */ + tc1_wred_min 48 40 32 /* Minimum threshold for traffic class 1 queue (min_th) in number of packets */ + tc1_wred_max 64 64 64 /* Maximum threshold for traffic class 1 queue (max_th) in number of packets */ + tc1_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 1 queue (maxp = 1 / maxp_inv) */ + tc1_wred_weight 9 9 9 /* Traffic Class 1 queue weight */ + tc2_wred_min 48 40 32 /* Minimum threshold for traffic class 2 queue (min_th) in number of packets */ + tc2_wred_max 64 64 64 /* Maximum threshold for traffic class 2 queue (max_th) in number of packets */ + tc2_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 2 queue (maxp = 1 / maxp_inv) */ + tc2_wred_weight 9 9 9 /* Traffic Class 2 queue weight */ + tc3_wred_min 48 40 32 /* Minimum threshold for traffic class 3 queue (min_th) in number of packets */ + tc3_wred_max 64 64 64 /* Maximum threshold for traffic class 3 queue (max_th) in number of packets */ + tc3_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 3 queue (maxp = 1 / maxp_inv) */ + tc3_wred_weight 9 9 9 /* Traffic Class 3 queue weight */ +} +``` + + +### DPDK QoS Scheduler Integration in VPP + +The Hierarchical Quaity-of-Service (HQoS) scheduler object could be seen as +part of the logical NIC output interface. To enable HQoS on specific output +interface, vpp startup.conf file has to be configured accordingly. The output +interface that requires HQoS, should have "hqos" parameter specified in dpdk +section. Another optional parameter "hqos-thread" has been defined which can +be used to associate the output interface with specific hqos thread. In cpu +section of the config file, "corelist-hqos-threads" is introduced to assign +logical cpu cores to run the HQoS threads. A HQoS thread can run multiple HQoS +objects each associated with different output interfaces. All worker threads +instead of writing packets to NIC TX queue directly, write the packets to a +software queues. The hqos_threads read the software queues, and enqueue the +packets to HQoS objects, as well as dequeue packets from HQOS objects and +write them to NIC output interfaces. The worker threads need to be able to +send the packets to any output interface, therefore, each HQoS object +associated with NIC output interface should have software queues equal to +worker threads count. + +Following illustrates the sample startup configuration file with 4x worker +threads feeding 2x hqos threads that handle each QoS scheduler for 1x output +interface. + +``` +dpdk { + socket-mem 16384,16384 + + dev 0000:02:00.0 { + num-rx-queues 2 + hqos + } + dev 0000:06:00.0 { + num-rx-queues 2 + hqos + } + + num-mbufs 1000000 +} + +cpu { + main-core 0 + corelist-workers 1, 2, 3, 4 + corelist-hqos-threads 5, 6 +} +``` + + +### QoS scheduler CLI Commands + +Each QoS scheduler instance is initialised with default parameters required to +configure hqos port, subport, pipe and queues. Some of the parameters can be +re-configured in run-time through CLI commands. + + +#### Configuration + +Following commands can be used to configure QoS scheduler parameters. + +The command below can be used to set the subport level parameters such as +token bucket rate (bytes per seconds), token bucket size (bytes), traffic +class rates (bytes per seconds) and token update period (Milliseconds). + +``` +set dpdk interface hqos subport subport [rate ] + [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] +``` + +For setting the pipe profile, following command can be used. + +``` +set dpdk interface hqos pipe subport pipe + profile +``` + +To assign QoS scheduler instance to the specific thread, following command can +be used. + +``` +set dpdk interface hqos placement thread +``` + +The command below is used to set the packet fields required for classifiying +the incoming packet. As a result of classification process, packet field +information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, +color) and stored in packet mbuf. + +``` +set dpdk interface hqos pktfield id subport|pipe|tc offset + mask +``` + +The DSCP table entries used for idenfiying the traffic class and queue can be set using the command below; + +``` +set dpdk interface hqos tctbl entry tc queue +``` + + +#### Show Command + +The QoS Scheduler configuration can displayed using the command below. + +``` + vpp# show dpdk interface hqos TenGigabitEthernet2/0/0 + Thread: + Input SWQ size = 4096 packets + Enqueue burst size = 256 packets + Dequeue burst size = 220 packets + Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) + Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) + Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) + Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) + [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + Port: + Rate = 1250000000 bytes/second + MTU = 1514 bytes + Frame overhead = 24 bytes + Number of subports = 1 + Number of pipes per subport = 4096 + Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets + Number of pipe profiles = 1 + Subport 0: + Rate = 120000000 bytes/second + Token bucket size = 1000000 bytes + Traffic class rate: TC0 = 120000000, TC1 = 120000000, TC2 = 120000000, TC3 = 120000000 bytes/second + TC period = 10 milliseconds + Pipe profile 0: + Rate = 305175 bytes/second + Token bucket size = 1000000 bytes + Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second + TC period = 40 milliseconds + TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 +``` + +The QoS Scheduler placement over the logical cpu cores can be displayed using +below command. + +``` + vpp# show dpdk interface hqos placement + Thread 5 (vpp_hqos-threads_0 at lcore 5): + TenGigabitEthernet2/0/0 queue 0 + Thread 6 (vpp_hqos-threads_1 at lcore 6): + TenGigabitEthernet4/0/1 queue 0 +``` + + +### QoS Scheduler Binary APIs + +This section explans the available binary APIs for configuring QoS scheduler +parameters in run-time. + +The following API can be used to set the pipe profile of a pipe that belongs +to a given subport: + +``` +sw_interface_set_dpdk_hqos_pipe rx | sw_if_index + subport pipe profile +``` + +The data structures used for set the pipe profile parameter are as follows; + +``` + /** \\brief DPDK interface HQoS pipe profile set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param pipe - pipe ID within its subport + @param profile - pipe profile ID + */ + define sw_interface_set_dpdk_hqos_pipe { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 pipe; + u32 profile; + }; + + /** \\brief DPDK interface HQoS pipe profile set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_pipe_reply { + u32 context; + i32 retval; + }; +``` + +The following API can be used to set the subport level parameters, for +example- token bucket rate (bytes per seconds), token bucket size (bytes), +traffic class rate (bytes per seconds) and tokens update period. + +``` +sw_interface_set_dpdk_hqos_subport rx | sw_if_index + subport [rate ] [bktsize ] + [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] +``` + +The data structures used for set the subport level parameter are as follows; + +``` + /** \\brief DPDK interface HQoS subport parameters set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param tb_rate - subport token bucket rate (measured in bytes/second) + @param tb_size - subport token bucket size (measured in credits) + @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) + @param tc_period - enforcement period for rates (measured in milliseconds) + */ + define sw_interface_set_dpdk_hqos_subport { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 tb_rate; + u32 tb_size; + u32 tc_rate[4]; + u32 tc_period; + }; + + /** \\brief DPDK interface HQoS subport parameters set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_subport_reply { + u32 context; + i32 retval; + }; +``` + +The following API can be used set the DSCP table entry. The DSCP table have +64 entries to map the packet DSCP field onto traffic class and hqos input +queue. + +``` +sw_interface_set_dpdk_hqos_tctbl rx | sw_if_index + entry tc queue +``` + +The data structures used for setting DSCP table entries are given below. + +``` + /** \\brief DPDK interface HQoS tctbl entry set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param entry - entry index ID + @param tc - traffic class (0 .. 3) + @param queue - traffic class queue (0 .. 3) + */ + define sw_interface_set_dpdk_hqos_tctbl { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 entry; + u32 tc; + u32 queue; + }; + + /** \\brief DPDK interface HQoS tctbl entry set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_tctbl_reply { + u32 context; + i32 retval; + }; +``` diff --git a/src/plugins/dpdk/init.c b/src/plugins/dpdk/init.c new file mode 100755 index 00000000..e009ef3e --- /dev/null +++ b/src/plugins/dpdk/init.c @@ -0,0 +1,2074 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +dpdk_main_t dpdk_main; + +#include +#include + +/* define message IDs */ +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +/* Macro to finish up custom dump fns */ +#define FINISH \ + vec_add1 (s, 0); \ + vl_print (handle, (char *)s); \ + vec_free (s); \ + return handle; + +#include + +static void + vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + u32 pipe = ntohl (mp->pipe); + u32 profile = ntohl (mp->profile); + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "subport %u pipe %u profile %u ", + ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); + + FINISH; +} + +static void + vl_api_sw_interface_set_dpdk_hqos_subport_t_handler + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + struct rte_sched_subport_params p; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + p.tb_rate = ntohl (mp->tb_rate); + p.tb_size = ntohl (mp->tb_size); + p.tc_rate[0] = ntohl (mp->tc_rate[0]); + p.tc_rate[1] = ntohl (mp->tc_rate[1]); + p.tc_rate[2] = ntohl (mp->tc_rate[2]); + p.tc_rate[3] = ntohl (mp->tc_rate[3]); + p.tc_period = ntohl (mp->tc_period); + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = + format (s, + "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", + ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), + ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), + ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), + ntohl (mp->tc_period)); + + FINISH; +} + +static void + vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 entry = ntohl (mp->entry); + u32 tc = ntohl (mp->tc); + u32 queue = ntohl (mp->queue); + u32 val, i; + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + clib_warning ("invalid traffic class !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + clib_warning ("invalid queue !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + if (p == 0) + { + clib_warning ("worker thread registration AWOL !!"); + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + + BAD_SW_IF_INDEX_LABEL; +done: + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "entry %u tc %u queue %u", + ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); + + FINISH; +} + +#define foreach_dpdk_plugin_api_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) + +/* Set up the API message handling tables */ +static clib_error_t * +dpdk_plugin_api_hookup (vlib_main_t * vm) +{ + dpdk_main_t *dm __attribute__ ((unused)) = &dpdk_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_dpdk_plugin_api_msg; +#undef _ + return 0; +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (dpdk_main_t * dm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + dm->msg_id_base); + foreach_vl_msg_name_crc_dpdk; +#undef _ +} + +// TODO +/* +static void plugin_custom_dump_configure (dpdk_main_t * dm) +{ +#define _(n,f) dm->api_main->msg_print_handlers \ + [VL_API_##n + dm->msg_id_base] \ + = (void *) vl_api_##f##_t_print; + foreach_dpdk_plugin_api_msg; +#undef _ +} +*/ +/* force linker to link functions used by vlib and declared weak */ +void *vlib_weakly_linked_functions[] = { + &rte_pktmbuf_init, + &rte_pktmbuf_pool_init, +}; + +#define LINK_STATE_ELOGS 0 + +#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" +#define VPP_RUN_DIR "/run/vpp" + +/* Port configuration, mildly modified Intel app values */ + +static struct rte_eth_conf port_conf_template = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +clib_error_t * +dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) +{ + int rv; + int j; + + ASSERT (os_get_cpu_number () == 0); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + } + + rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, + xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + /* Set up one TX-queue per worker thread */ + for (j = 0; j < xd->tx_q_used; j++) + { + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + xd->cpu_socket, &xd->tx_conf); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + SOCKET_ID_ANY, &xd->tx_conf); + if (rv < 0) + break; + } + + if (rv < 0) + return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", + xd->device_index, rv); + + for (j = 0; j < xd->rx_q_used; j++) + { + + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + xd->cpu_socket, 0, + dm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + SOCKET_ID_ANY, 0, + dm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); + if (rv < 0) + return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", + xd->device_index, rv); + } + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + int rv; + rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } + return 0; +} + +static u32 +dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + u32 old = 0; + + if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags)) + { + old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0; + + if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + xd->flags |= DPDK_DEVICE_FLAG_PROMISC; + else + xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) + rte_eth_promiscuous_enable (xd->device_index); + else + rte_eth_promiscuous_disable (xd->device_index); + } + } + else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) + { + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_ENIC) + { + struct rte_eth_dev_info dev_info; + + /* + * Restore mtu to what has been set by CIMC in the firmware cfg. + */ + rte_eth_dev_info_get (xd->device_index, &dev_info); + hi->max_packet_bytes = dev_info.max_rx_pktlen; + + vlib_cli_output (vlib_get_main (), + "Cisco VIC mtu can only be changed " + "using CIMC then rebooting the server!"); + } + else + { + int rv; + + xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + rte_eth_dev_stop (xd->device_index); + + rv = rte_eth_dev_configure + (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + vlib_cli_output (vlib_get_main (), + "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + int rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } + } + } + return old; +} + +void +dpdk_device_lock_init (dpdk_device_t * xd) +{ + int q; + vec_validate (xd->lockp, xd->tx_q_used - 1); + for (q = 0; q < xd->tx_q_used; q++) + { + xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); + } +} + +void +dpdk_device_lock_free (dpdk_device_t * xd) +{ + int q; + + for (q = 0; q < vec_len (xd->lockp); q++) + clib_mem_free ((void *) xd->lockp[q]); + vec_free (xd->lockp); + xd->lockp = 0; +} + +static clib_error_t * +dpdk_lib_init (dpdk_main_t * dm) +{ + u32 nports; + u32 nb_desc = 0; + int i; + clib_error_t *error; + vlib_main_t *vm = vlib_get_main (); + vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hi; + dpdk_device_t *xd; + vlib_pci_addr_t last_pci_addr; + u32 last_pci_addr_port = 0; + vlib_thread_registration_t *tr, *tr_hqos; + uword *p, *p_hqos; + + u32 next_cpu = 0, next_hqos_cpu = 0; + u8 af_packet_port_id = 0; + last_pci_addr.as_u32 = ~0; + + dm->input_cpu_first_index = 0; + dm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + dm->input_cpu_first_index = tr->first_index; + dm->input_cpu_count = tr->count; + } + + vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + dm->hqos_cpu_first_index = 0; + dm->hqos_cpu_count = 0; + + /* find out which cpus will be used for I/O TX */ + p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads"); + tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0; + + if (tr_hqos && tr_hqos->count > 0) + { + dm->hqos_cpu_first_index = tr_hqos->first_index; + dm->hqos_cpu_count = tr_hqos->count; + } + + vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + nports = rte_eth_dev_count (); + if (nports < 1) + { + clib_warning ("DPDK drivers found no ports..."); + } + + if (CLIB_DEBUG > 0) + clib_warning ("DPDK drivers found %d ports...", nports); + + /* + * All buffers are all allocated from the same rte_mempool. + * Thus they all have the same number of data bytes. + */ + dm->vlib_buffer_free_list_index = + vlib_buffer_get_or_create_free_list (vm, + VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, + "dpdk rx"); + + if (dm->conf->enable_tcp_udp_checksum) + dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT + | IP_BUFFER_L4_CHECKSUM_COMPUTED); + + for (i = 0; i < nports; i++) + { + u8 addr[6]; + u8 vlan_strip = 0; + int j; + struct rte_eth_dev_info dev_info; + clib_error_t *rv; + struct rte_eth_link l; + dpdk_device_config_t *devconf = 0; + vlib_pci_addr_t pci_addr; + uword *p = 0; + + rte_eth_dev_info_get (i, &dev_info); + if (dev_info.pci_dev) /* bonded interface has no pci info */ + { + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + p = + hash_get (dm->conf->device_config_index_by_pci_addr, + pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + /* Create vnet interface */ + vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + xd->cpu_socket = (i8) rte_eth_dev_socket_id (i); + + /* Handle interface naming for devices with multiple ports sharing same PCI ID */ + if (dev_info.pci_dev) + { + struct rte_eth_dev_info di = { 0 }; + rte_eth_dev_info_get (i + 1, &di); + if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 && + memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr, + sizeof (struct rte_pci_addr)) == 0) + { + xd->interface_name_suffix = format (0, "0"); + last_pci_addr.as_u32 = pci_addr.as_u32; + last_pci_addr_port = i; + } + else if (pci_addr.as_u32 == last_pci_addr.as_u32) + { + xd->interface_name_suffix = + format (0, "%u", i - last_pci_addr_port); + } + else + { + last_pci_addr.as_u32 = ~0; + } + } + else + last_pci_addr.as_u32 = ~0; + + clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, + sizeof (struct rte_eth_txconf)); + if (dm->conf->no_multi_seg) + { + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 0; + } + else + { + xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 1; + xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG; + } + + clib_memcpy (&xd->port_conf, &port_conf_template, + sizeof (struct rte_eth_conf)); + + xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains); + + if (devconf->num_tx_queues > 0 + && devconf->num_tx_queues < xd->tx_q_used) + xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); + + if (devconf->num_rx_queues > 1 && dm->use_rss == 0) + { + dm->use_rss = 1; + } + + if (devconf->num_rx_queues > 1 + && dev_info.max_rx_queues >= devconf->num_rx_queues) + { + xd->rx_q_used = devconf->num_rx_queues; + xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + if (devconf->rss_fn == 0) + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = + ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; + else + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; + } + else + xd->rx_q_used = 1; + + xd->flags |= DPDK_DEVICE_FLAG_PMD; + + /* workaround for drivers not setting driver_name */ + if ((!dev_info.driver_name) && (dev_info.pci_dev)) + dev_info.driver_name = dev_info.pci_dev->driver->driver.name; + + ASSERT (dev_info.driver_name); + + if (!xd->pmd) + { + + +#define _(s,f) else if (dev_info.driver_name && \ + !strcmp(dev_info.driver_name, s)) \ + xd->pmd = VNET_DPDK_PMD_##f; + if (0) + ; + foreach_dpdk_pmd +#undef _ + else + xd->pmd = VNET_DPDK_PMD_UNKNOWN; + + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + + switch (xd->pmd) + { + /* 1G adapters */ + case VNET_DPDK_PMD_E1000EM: + case VNET_DPDK_PMD_IGB: + case VNET_DPDK_PMD_IGBVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + break; + + /* 10G adapters */ + case VNET_DPDK_PMD_IXGBE: + case VNET_DPDK_PMD_IXGBEVF: + case VNET_DPDK_PMD_THUNDERX: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case VNET_DPDK_PMD_DPAA2: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Cisco VIC */ + case VNET_DPDK_PMD_ENIC: + rte_eth_link_get_nowait (i, &l); + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + if (l.link_speed == 40000) + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + else + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Intel Fortville */ + case VNET_DPDK_PMD_I40E: + case VNET_DPDK_PMD_I40EVF: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + + switch (dev_info.pci_dev->id.device_id) + { + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_SFP_XL710: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case I40E_DEV_ID_QSFP_A: + case I40E_DEV_ID_QSFP_B: + case I40E_DEV_ID_QSFP_C: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case I40E_DEV_ID_VF: + rte_eth_link_get_nowait (i, &l); + xd->port_type = l.link_speed == 10000 ? + VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_CXGBE: + switch (dev_info.pci_dev->id.device_id) + { + case 0x540d: /* T580-CR */ + case 0x5410: /* T580-LP-cr */ + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case 0x5403: /* T540-CR */ + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_MLX5: + { + char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", 0 }; + char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT", + "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0 + }; + char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 }; + + vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr); + u8 *pn = 0; + char **c; + int found = 0; + pn = format (0, "%U%c", + format_vlib_pci_vpd, pd->vpd_r, "PN", 0); + + if (!pn) + break; + + c = pn_100g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G; + break; + } + c++; + } + + c = pn_40g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + } + c++; + } + + c = pn_10g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + } + c++; + } + + vec_free (pn); + } + + break; + /* Intel Red Rock Canyon */ + case VNET_DPDK_PMD_FM10K: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; + break; + + /* virtio */ + case VNET_DPDK_PMD_VIRTIO: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; + xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; + break; + + /* vmxnet3 */ + case VNET_DPDK_PMD_VMXNET3: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + break; + + case VNET_DPDK_PMD_AF_PACKET: + xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; + xd->af_packet_port_id = af_packet_port_id++; + break; + + case VNET_DPDK_PMD_BOND: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; + break; + + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + + if (devconf->num_rx_desc) + xd->nb_rx_desc = devconf->num_rx_desc; + + if (devconf->num_tx_desc) + xd->nb_tx_desc = devconf->num_tx_desc; + } + + /* + * Ensure default mtu is not > the mtu read from the hardware. + * Otherwise rte_eth_dev_configure() will fail and the port will + * not be available. + */ + if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) + { + /* + * This device does not support the platforms's max frame + * size. Use it's advertised mru instead. + */ + xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; + } + else + { + xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; + + /* + * Some platforms do not account for Ethernet FCS (4 bytes) in + * MTU calculations. To interop with them increase mru but only + * if the device's settings can support it. + */ + if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && + xd->port_conf.rxmode.hw_strip_crc) + { + /* + * Allow additional 4 bytes (for Ethernet FCS). These bytes are + * stripped by h/w and so will not consume any buffer memory. + */ + xd->port_conf.rxmode.max_rx_pkt_len += 4; + } + } + + if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + clib_memcpy (addr + 2, &rnd, sizeof (rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } + else + rte_eth_macaddr_get (i, (struct ether_addr *) addr); + + if (xd->tx_q_used < tm->n_vlib_mains) + dpdk_device_lock_init (xd); + + xd->device_index = xd - dm->devices; + ASSERT (i == xd->device_index); + xd->per_interface_next_index = ~0; + + /* assign interface to input thread */ + dpdk_device_and_queue_t *dq; + int q; + + if (devconf->workers) + { + int i; + q = 0; + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, devconf->workers, ({ + int cpu = dm->input_cpu_first_index + i; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; + vec_validate(xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q++; + })); + /* *INDENT-ON* */ + } + else + for (q = 0; q < xd->rx_q_used; q++) + { + int cpu = dm->input_cpu_first_index + next_cpu; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; + + /* + * numa node for worker thread handling this queue + * needed for taking buffers from the right mempool + */ + vec_validate (xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); + + /* + * construct vector of (device,queue) pairs for each worker thread + */ + vec_add2 (dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q; + + next_cpu++; + if (next_cpu == dm->input_cpu_count) + next_cpu = 0; + } + + + if (devconf->hqos_enabled) + { + xd->flags |= DPDK_DEVICE_FLAG_HQOS; + + if (devconf->hqos.hqos_thread_valid) + { + int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; + + if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) + return clib_error_return (0, "invalid HQoS thread index"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + } + else + { + int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; + + if (dm->hqos_cpu_count == 0) + return clib_error_return (0, "no HQoS threads available"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + + next_hqos_cpu++; + if (next_hqos_cpu == dm->hqos_cpu_count) + next_hqos_cpu = 0; + + devconf->hqos.hqos_thread_valid = 1; + devconf->hqos.hqos_thread = cpu; + } + } + + vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < tm->n_vlib_mains; j++) + { + vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc, + sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } + + vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } + + vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + + rv = dpdk_port_setup (dm, xd); + + if (rv) + return rv; + + if (devconf->hqos_enabled) + { + rv = dpdk_port_setup_hqos (xd, &devconf->hqos); + if (rv) + return rv; + } + + /* count the number of descriptors used for this device */ + nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; + + error = ethernet_register_interface + (dm->vnet_main, dpdk_device_class.index, xd->device_index, + /* ethernet address */ addr, + &xd->vlib_hw_if_index, dpdk_flag_change); + if (error) + return error; + + sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); + + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_ENIC) + { + /* + * Initialize mtu to what has been set by CIMC in the firmware cfg. + */ + hi->max_packet_bytes = dev_info.max_rx_pktlen; + if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) + vlan_strip = 1; /* remove vlan tag from VIC port by default */ + else + clib_warning ("VLAN strip disabled for interface\n"); + } + else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) + vlan_strip = 1; + + if (vlan_strip) + { + int vlan_off; + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); + vlan_off |= ETH_VLAN_STRIP_OFFLOAD; + xd->port_conf.rxmode.hw_vlan_strip = vlan_off; + if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0) + clib_warning ("VLAN strip enabled for interface\n"); + else + clib_warning ("VLAN strip cannot be supported by interface\n"); + } + + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = + xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); + + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); + } + + if (nb_desc > dm->conf->num_mbufs) + clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", + dm->conf->num_mbufs, nb_desc); + + return 0; +} + +static void +dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) +{ + vlib_pci_main_t *pm = &pci_main; + clib_error_t *error; + vlib_pci_device_t *d; + u8 *pci_addr = 0; + int num_whitelisted = vec_len (conf->dev_confs); + + /* *INDENT-OFF* */ + pool_foreach (d, pm->pci_devs, ({ + dpdk_device_config_t * devconf = 0; + vec_reset_length (pci_addr); + pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); + + if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) + continue; + + if (num_whitelisted) + { + uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); + + if (!p) + continue; + + devconf = pool_elt_at_index (conf->dev_confs, p[0]); + } + + /* virtio */ + if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) + ; + /* vmxnet3 */ + else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) + ; + /* all Intel devices */ + else if (d->vendor_id == 0x8086) + ; + /* Cisco VIC */ + else if (d->vendor_id == 0x1137 && d->device_id == 0x0043) + ; + /* Chelsio T4/T5 */ + else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000) + ; + else + { + clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found " + "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id, + pci_addr); + continue; + } + + error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); + + if (error) + { + if (devconf == 0) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, + devconf - conf->dev_confs); + devconf->pci_addr.as_u32 = d->bus_address.as_u32; + } + devconf->is_blacklisted = 1; + clib_error_report (error); + } + })); + /* *INDENT-ON* */ + vec_free (pci_addr); +} + +static clib_error_t * +dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, + unformat_input_t * input, u8 is_default) +{ + clib_error_t *error = 0; + uword *p; + dpdk_device_config_t *devconf; + unformat_input_t sub_input; + + if (is_default) + { + devconf = &conf->default_devconf; + } + else + { + p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32); + + if (!p) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, + devconf - conf->dev_confs); + } + else + return clib_error_return (0, + "duplicate configuration for PCI address %U", + format_vlib_pci_addr, &pci_addr); + } + + devconf->pci_addr.as_u32 = pci_addr.as_u32; + devconf->hqos_enabled = 0; + dpdk_device_config_hqos_default (&devconf->hqos); + + if (!input) + return 0; + + unformat_skip_white_space (input); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues)) + ; + else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues)) + ; + else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc)) + ; + else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc)) + ; + else if (unformat (input, "workers %U", unformat_bitmap_list, + &devconf->workers)) + ; + else + if (unformat + (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) + { + error = unformat_rss_fn (&sub_input, &devconf->rss_fn); + if (error) + break; + } + else if (unformat (input, "vlan-strip-offload off")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; + else if (unformat (input, "vlan-strip-offload on")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; + else + if (unformat + (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input)) + { + devconf->hqos_enabled = 1; + error = unformat_hqos (&sub_input, &devconf->hqos); + if (error) + break; + } + else if (unformat (input, "hqos")) + { + devconf->hqos_enabled = 1; + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + + if (error) + return error; + + if (devconf->workers && devconf->num_rx_queues == 0) + devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers); + else if (devconf->workers && + clib_bitmap_count_set_bits (devconf->workers) != + devconf->num_rx_queues) + error = + clib_error_return (0, + "%U: number of worker threadds must be " + "equal to number of rx queues", format_vlib_pci_addr, + &pci_addr); + + return error; +} + +static clib_error_t * +dpdk_config (vlib_main_t * vm, unformat_input_t * input) +{ + clib_error_t *error = 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_config_main_t *conf = &dpdk_config_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_config_t *devconf; + vlib_pci_addr_t pci_addr; + unformat_input_t sub_input; + u8 *s, *tmp = 0; + u8 *rte_cmd = 0, *ethname = 0; + u32 log_level; + int ret, i; + int num_whitelisted = 0; + u8 no_pci = 0; + u8 no_huge = 0; + u8 huge_dir = 0; + u8 file_prefix = 0; + u8 *socket_mem = 0; + + conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* Prime the pump */ + if (unformat (input, "no-hugetlb")) + { + vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); + no_huge = 1; + } + + else if (unformat (input, "enable-tcp-udp-checksum")) + conf->enable_tcp_udp_checksum = 1; + + else if (unformat (input, "decimal-interface-names")) + conf->interface_name_format_decimal = 1; + + else if (unformat (input, "no-multi-seg")) + conf->no_multi_seg = 1; + + else if (unformat (input, "enable-cryptodev")) + conf->cryptodev = 1; + + else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, + &sub_input)) + { + error = + dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input, + 1); + + if (error) + return error; + } + else + if (unformat + (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, + unformat_vlib_cli_sub_input, &sub_input)) + { + error = dpdk_device_config (conf, pci_addr, &sub_input, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr)) + { + error = dpdk_device_config (conf, pci_addr, 0, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) + ; + else if (unformat (input, "kni %d", &conf->num_kni)) + ; + else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) + ; + else if (unformat (input, "socket-mem %s", &socket_mem)) + ; + else if (unformat (input, "no-pci")) + { + no_pci = 1; + tmp = format (0, "--no-pci%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) + ; + +#define _(a) \ + else if (unformat(input, #a)) \ + { \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + } + foreach_eal_double_hyphen_predicate_arg +#undef _ +#define _(a) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + if (!strncmp(#a, "huge-dir", 8)) \ + huge_dir = 1; \ + else if (!strncmp(#a, "file-prefix", 11)) \ + file_prefix = 1; \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + if (!strncmp(#a, "vdev", 4)) \ + if (strstr((char*)s, "af_packet")) \ + clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \ + vec_add1 (conf->eal_init_args, s); \ + } + foreach_eal_double_hyphen_arg +#undef _ +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + } + foreach_eal_single_hyphen_arg +#undef _ +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + conf->a##_set_manually = 1; \ + } + foreach_eal_single_hyphen_mandatory_arg +#undef _ + else if (unformat (input, "default")) + ; + + else if (unformat_skip_white_space (input)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (!conf->uio_driver_name) + conf->uio_driver_name = format (0, "uio_pci_generic%c", 0); + + /* + * Use 1G huge pages if available. + */ + if (!no_huge && !huge_dir) + { + u32 x, *mem_by_socket = 0; + uword c = 0; + u8 use_1g = 1; + u8 use_2m = 1; + u8 less_than_1g = 1; + int rv; + + umount (DEFAULT_HUGE_DIR); + + /* Process "socket-mem" parameter value */ + if (vec_len (socket_mem)) + { + unformat_input_t in; + unformat_init_vector (&in, socket_mem); + while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) + { + if (unformat (&in, "%u,", &x)) + ; + else if (unformat (&in, "%u", &x)) + ; + else if (unformat (&in, ",")) + x = 0; + else + break; + + vec_add1 (mem_by_socket, x); + + if (x > 1023) + less_than_1g = 0; + } + /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ + unformat_free (&in); + socket_mem = 0; + } + else + { + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( + { + vec_validate(mem_by_socket, c); + mem_by_socket[c] = 256; /* default per-socket mem */ + } + )); + /* *INDENT-ON* */ + } + + /* check if available enough 1GB pages for each socket */ + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( + { + int pages_avail, page_size, mem; + + vec_validate(mem_by_socket, c); + mem = mem_by_socket[c]; + + page_size = 1024; + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); + + if (pages_avail < 0 || page_size * pages_avail < mem) + use_1g = 0; + + page_size = 2; + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); + + if (pages_avail < 0 || page_size * pages_avail < mem) + use_2m = 0; + })); + /* *INDENT-ON* */ + + if (mem_by_socket == 0) + { + error = clib_error_return (0, "mem_by_socket NULL"); + goto done; + } + _vec_len (mem_by_socket) = c + 1; + + /* regenerate socket_mem string */ + vec_foreach_index (x, mem_by_socket) + socket_mem = format (socket_mem, "%s%u", + socket_mem ? "," : "", mem_by_socket[x]); + socket_mem = format (socket_mem, "%c", 0); + + vec_free (mem_by_socket); + + rv = mkdir (VPP_RUN_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + VPP_RUN_DIR, errno); + goto done; + } + + rv = mkdir (DEFAULT_HUGE_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + DEFAULT_HUGE_DIR, errno); + goto done; + } + + if (use_1g && !(less_than_1g && use_2m)) + { + rv = + mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); + } + else if (use_2m) + { + rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); + } + else + { + return clib_error_return (0, "not enough free huge pages"); + } + + if (rv) + { + error = clib_error_return (0, "mount failed %d", errno); + goto done; + } + + tmp = format (0, "--huge-dir%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); + vec_add1 (conf->eal_init_args, tmp); + if (!file_prefix) + { + tmp = format (0, "--file-prefix%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "vpp%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } + } + + vec_free (rte_cmd); + vec_free (ethname); + + if (error) + return error; + + /* I'll bet that -c and -n must be the first and second args... */ + if (!conf->coremask_set_manually) + { + vlib_thread_registration_t *tr; + uword *coremask = 0; + int i; + + /* main thread core */ + coremask = clib_bitmap_set (coremask, tm->main_lcore, 1); + + for (i = 0; i < vec_len (tm->registrations); i++) + { + tr = tm->registrations[i]; + coremask = clib_bitmap_or (coremask, tr->coremask); + } + + vec_insert (conf->eal_init_args, 2, 1); + conf->eal_init_args[1] = (u8 *) "-c"; + tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); + conf->eal_init_args[2] = tmp; + clib_bitmap_free (coremask); + } + + if (!conf->nchannels_set_manually) + { + vec_insert (conf->eal_init_args, 2, 3); + conf->eal_init_args[3] = (u8 *) "-n"; + tmp = format (0, "%d", conf->nchannels); + conf->eal_init_args[4] = tmp; + } + + if (no_pci == 0 && geteuid () == 0) + dpdk_bind_devices_to_uio (conf); + +#define _(x) \ + if (devconf->x == 0 && conf->default_devconf.x > 0) \ + devconf->x = conf->default_devconf.x ; + + /* *INDENT-OFF* */ + pool_foreach (devconf, conf->dev_confs, ({ + + /* default per-device config items */ + foreach_dpdk_device_config_item + + /* add DPDK EAL whitelist/blacklist entry */ + if (num_whitelisted > 0 && devconf->is_blacklisted == 0) + { + tmp = format (0, "-w%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (num_whitelisted == 0 && devconf->is_blacklisted != 0) + { + tmp = format (0, "-b%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + })); + /* *INDENT-ON* */ + +#undef _ + + /* set master-lcore */ + tmp = format (0, "--master-lcore%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%u%c", tm->main_lcore, 0); + vec_add1 (conf->eal_init_args, tmp); + + /* set socket-mem */ + tmp = format (0, "--socket-mem%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%s%c", socket_mem, 0); + vec_add1 (conf->eal_init_args, tmp); + + /* NULL terminate the "argv" vector, in case of stupidity */ + vec_add1 (conf->eal_init_args, 0); + _vec_len (conf->eal_init_args) -= 1; + + /* Set up DPDK eal and packet mbuf pool early. */ + + log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; + + rte_set_log_level (log_level); + + vm = vlib_get_main (); + + /* make copy of args as rte_eal_init tends to mess up with arg array */ + for (i = 1; i < vec_len (conf->eal_init_args); i++) + conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", + conf->eal_init_args[i]); + + ret = + rte_eal_init (vec_len (conf->eal_init_args), + (char **) conf->eal_init_args); + + /* lazy umount hugepages */ + umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); + + if (ret < 0) + return clib_error_return (0, "rte_eal_init returned %d", ret); + + /* Dump the physical memory layout prior to creating the mbuf_pool */ + fprintf (stdout, "DPDK physical memory layout:\n"); + rte_dump_physmem_layout (stdout); + + /* main thread 1st */ + error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); + if (error) + return error; + + for (i = 0; i < RTE_MAX_LCORE; i++) + { + error = vlib_buffer_pool_create (vm, conf->num_mbufs, + rte_lcore_to_socket_id (i)); + if (error) + return error; + } + +done: + return error; +} + +VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); + +void +dpdk_update_link_state (dpdk_device_t * xd, f64 now) +{ + vnet_main_t *vnm = vnet_get_main (); + struct rte_eth_link prev_link = xd->link; + u32 hw_flags = 0; + u8 hw_flags_chg = 0; + + /* only update link state for PMD interfaces */ + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + return; + + xd->time_last_link_update = now ? now : xd->time_last_link_update; + memset (&xd->link, 0, sizeof (xd->link)); + rte_eth_link_get_nowait (xd->device_index, &xd->link); + + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, admin_up %d," + "old link_state %d new link_state %d",.format_args = "i4i1i1i1",}; + + struct + { + u32 sw_if_index; + u8 admin_up; + u8 old_link_state; + u8 new_link_state; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0; + ed->old_link_state = (u8) + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); + ed->new_link_state = (u8) xd->link.link_status; + } + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) && + ((xd->link.link_status != 0) ^ + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) + { + hw_flags_chg = 1; + hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + } + + if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) + { + hw_flags_chg = 1; + switch (xd->link.link_duplex) + { + case ETH_LINK_HALF_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; + break; + case ETH_LINK_FULL_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; + break; + default: + break; + } + } + if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) + { + hw_flags_chg = 1; + switch (xd->link.link_speed) + { + case ETH_SPEED_NUM_10M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; + break; + case ETH_SPEED_NUM_100M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; + break; + case ETH_SPEED_NUM_1G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; + break; + case ETH_SPEED_NUM_10G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; + break; + case ETH_SPEED_NUM_40G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; + break; + case 0: + break; + default: + clib_warning ("unknown link speed %d", xd->link.link_speed); + break; + } + } + if (hw_flags_chg) + { + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, new flags %d",.format_args + = "i4i4",}; + + struct + { + u32 sw_if_index; + u32 flags; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->flags = hw_flags; + } + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); + } +} + +static uword +dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + clib_error_t *error; + vnet_main_t *vnm = vnet_get_main (); + dpdk_main_t *dm = &dpdk_main; + ethernet_main_t *em = ðernet_main; + dpdk_device_t *xd; + vlib_thread_main_t *tm = vlib_get_thread_main (); + int i; + + error = dpdk_lib_init (dm); + + /* + * Turn on the input node if we found some devices to drive + * and we're not running worker threads or i/o threads + */ + + if (error == 0 && vec_len (dm->devices) > 0) + { + if (tm->n_vlib_mains == 1) + vlib_node_set_state (vm, dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + else + for (i = 0; i < tm->n_vlib_mains; i++) + if (vec_len (dm->devices_by_cpu[i]) > 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + } + + if (error) + clib_error_report (error); + + tm->worker_thread_release = 1; + + f64 now = vlib_time_now (vm); + vec_foreach (xd, dm->devices) + { + dpdk_update_link_state (xd, now); + } + + { + /* + * Extra set up for bond interfaces: + * 1. Setup MACs for bond interfaces and their slave links which was set + * in dpdk_port_setup() but needs to be done again here to take effect. + * 2. Set up info for bond interface related CLI support. + */ + int nports = rte_eth_dev_count (); + if (nports > 0) + { + for (i = 0; i < nports; i++) + { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get (i, &dev_info); + if (!dev_info.driver_name) + dev_info.driver_name = dev_info.pci_dev->driver->driver.name; + + ASSERT (dev_info.driver_name); + if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) + { + u8 addr[6]; + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (i, slink, 16); + if (nlink > 0) + { + vnet_hw_interface_t *bhi; + ethernet_interface_t *bei; + int rv; + + /* Get MAC of 1st slave link */ + rte_eth_macaddr_get (slink[0], + (struct ether_addr *) addr); + /* Set MAC of bounded interface to that of 1st slave link */ + rv = + rte_eth_bond_mac_address_set (i, + (struct ether_addr *) + addr); + if (rv < 0) + clib_warning ("Failed to set MAC address"); + + /* Populate MAC of bonded interface in VPP hw tables */ + bhi = + vnet_get_hw_interface (vnm, + dm->devices[i].vlib_hw_if_index); + bei = + pool_elt_at_index (em->interfaces, bhi->hw_instance); + clib_memcpy (bhi->hw_address, addr, 6); + clib_memcpy (bei->address, addr, 6); + /* Init l3 packet size allowed on bonded interface */ + bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); + while (nlink >= 1) + { /* for all slave links */ + int slave = slink[--nlink]; + dpdk_device_t *sdev = &dm->devices[slave]; + vnet_hw_interface_t *shi; + vnet_sw_interface_t *ssi; + /* Add MAC to all slave links except the first one */ + if (nlink) + rte_eth_dev_mac_addr_add (slave, + (struct ether_addr *) + addr, 0); + /* Set slaves bitmap for bonded interface */ + bhi->bond_info = + clib_bitmap_set (bhi->bond_info, + sdev->vlib_hw_if_index, 1); + /* Set slave link flags on slave interface */ + shi = + vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index); + ssi = + vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); + shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; + ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; + + /* Set l3 packet size allowed as the lowest of slave */ + if (bhi->max_l3_packet_bytes[VLIB_RX] > + shi->max_l3_packet_bytes[VLIB_RX]) + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + shi->max_l3_packet_bytes[VLIB_RX]; + + /* Set max packet size allowed as the lowest of slave */ + if (bhi->max_packet_bytes > shi->max_packet_bytes) + bhi->max_packet_bytes = shi->max_packet_bytes; + } + } + } + } + } + } + + while (1) + { + /* + * check each time through the loop in case intervals are changed + */ + f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? + dm->link_state_poll_interval : dm->stat_poll_interval; + + vlib_process_wait_for_event_or_clock (vm, min_wait); + + if (dm->admin_up_down_in_progress) + /* skip the poll if an admin up down is in progress (on any interface) */ + continue; + + vec_foreach (xd, dm->devices) + { + f64 now = vlib_time_now (vm); + if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) + dpdk_update_counters (xd, now); + if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) + dpdk_update_link_state (xd, now); + + } + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_process_node,static) = { + .function = dpdk_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dpdk-process", + .process_log2_n_stack_bytes = 17, +}; +/* *INDENT-ON* */ + +int +dpdk_set_stat_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_STATS_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.stat_poll_interval = interval; + + return 0; +} + +int +dpdk_set_link_state_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_LINK_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.link_state_poll_interval = interval; + + return 0; +} + +clib_error_t * +dpdk_init (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_node_t *ei; + clib_error_t *error = 0; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + /* verify that structs are cacheline aligned */ + STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0, + "Cache line marker must be 1st element in dpdk_device_t"); + STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == + CLIB_CACHE_LINE_BYTES, + "Data in cache line 0 is bigger than cache line size"); + STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, + "Cache line marker must be 1st element in frame_queue_trace_t"); + + u8 *name; + name = format (0, "dpdk_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + dm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + vec_free (name); + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main (); + dm->conf = &dpdk_config_main; + + error = dpdk_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (dm, &api_main); + +// TODO +// plugin_custom_dump_configure (dm); + + ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); + if (ei == 0) + return clib_error_return (0, "ethernet-input node AWOL"); + + dm->ethernet_input_node_index = ei->index; + + dm->conf->nchannels = 4; + dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; + vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); + + dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); + + /* $$$ use n_thread_stacks since it's known-good at this point */ + vec_validate (dm->recycle, tm->n_thread_stacks - 1); + + /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ + dm->buffer_flags_template = + (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID + | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); + + dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; + dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; + + /* init CLI */ + if ((error = vlib_call_init_function (vm, dpdk_cli_init))) + return error; + + return error; +} + +VLIB_INIT_FUNCTION (dpdk_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/cli.c b/src/plugins/dpdk/ipsec/cli.c new file mode 100644 index 00000000..40cee39b --- /dev/null +++ b/src/plugins/dpdk/ipsec/cli.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +static void +dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) +{ + dpdk_config_main_t *conf = &dpdk_config_main; + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 i, skip_master; + + if (!conf->cryptodev) + { + vlib_cli_output (vm, "DPDK Cryptodev support is disabled\n"); + return; + } + + if (detail_display) + vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n", + "cipher", "auth"); + else + vlib_cli_output (vm, "worker\tcrypto device id(type)\n"); + + skip_master = vlib_num_workers () > 0; + + for (i = 0; i < tm->n_vlib_mains; i++) + { + uword key, data; + u32 cpu_index = vlib_mains[i]->cpu_index; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + u8 *s = 0; + + if (skip_master) + { + skip_master = 0; + continue; + } + + if (!detail_display) + { + i32 last_cdev = -1; + crypto_qp_data_t *qpd; + + s = format (s, "%u\t", cpu_index); + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + { + u32 dev_id = qpd->dev_id; + + if ((u16) last_cdev != dev_id) + { + struct rte_cryptodev_info cdev_info; + + rte_cryptodev_info_get (dev_id, &cdev_info); + + s = format(s, "%u(%s)\t", dev_id, cdev_info.feature_flags & + RTE_CRYPTODEV_FF_HW_ACCELERATED ? "HW" : "SW"); + } + last_cdev = dev_id; + } + /* *INDENT-ON* */ + vlib_cli_output (vm, "%s", s); + } + else + { + char cipher_str[15], auth_str[15]; + struct rte_cryptodev_capabilities cap; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + /* *INDENT-OFF* */ + hash_foreach (key, data, cwm->algo_qp_map, + ({ + cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER; + cap.sym.cipher.algo = p_key->cipher_algo; + check_algo_is_supported (&cap, cipher_str); + cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH; + cap.sym.auth.algo = p_key->auth_algo; + check_algo_is_supported (&cap, auth_str); + vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n", + vlib_mains[i]->cpu_index, cipher_str, auth_str, + p_key->is_outbound ? "out" : "in", + cwm->qp_data[data].dev_id, + cwm->qp_data[data].qp_id); + })); + /* *INDENT-ON* */ + } + } +} + +static clib_error_t * +lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u16 detail = 0; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "verbose")) + detail = 1; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + dpdk_ipsec_show_mapping (vm, detail); + +done: + unformat_free (line_input); + + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (lcore_cryptodev_map, static) = { + .path = "show crypto device mapping", + .short_help = + "show cryptodev device mapping ", + .function = lcore_cryptodev_map_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c new file mode 100644 index 00000000..dc3452b2 --- /dev/null +++ b/src/plugins/dpdk/ipsec/crypto_node.c @@ -0,0 +1,215 @@ +/* + *------------------------------------------------------------------ + * crypto_node.c - DPDK Cryptodev input node + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define foreach_dpdk_crypto_input_next \ + _(DROP, "error-drop") \ + _(ENCRYPT_POST, "dpdk-esp-encrypt-post") \ + _(DECRYPT_POST, "dpdk-esp-decrypt-post") + +typedef enum +{ +#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f, + foreach_dpdk_crypto_input_next +#undef _ + DPDK_CRYPTO_INPUT_N_NEXT, +} dpdk_crypto_input_next_t; + +#define foreach_dpdk_crypto_input_error \ + _(DQ_COPS, "Crypto ops dequeued") \ + _(COP_FAILED, "Crypto op failed") + +typedef enum +{ +#define _(f,s) DPDK_CRYPTO_INPUT_ERROR_##f, + foreach_dpdk_crypto_input_error +#undef _ + DPDK_CRYPTO_INPUT_N_ERROR, +} dpdk_crypto_input_error_t; + +static char *dpdk_crypto_input_error_strings[] = { +#define _(n, s) s, + foreach_dpdk_crypto_input_error +#undef _ +}; + +vlib_node_registration_t dpdk_crypto_input_node; + +typedef struct +{ + u32 cdev; + u32 qp; + u32 status; + u32 sa_idx; + u32 next_index; +} dpdk_crypto_input_trace_t; + +static u8 * +format_dpdk_crypto_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *); + + s = format (s, "dpdk_crypto: cryptodev-id %u queue-pair %u next-index %d", + t->cdev, t->qp, t->next_index); + + s = format (s, " status %u sa-idx %u\n", t->status, t->sa_idx); + + return s; +} + +static_always_inline u32 +dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, + crypto_qp_data_t * qpd) +{ + u32 n_deq, *to_next = 0, next_index, n_cops, def_next_index; + struct rte_crypto_op **cops = qpd->cops; + + if (qpd->inflights == 0) + return 0; + + if (qpd->is_outbound) + def_next_index = DPDK_CRYPTO_INPUT_NEXT_ENCRYPT_POST; + else + def_next_index = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST; + + n_cops = rte_cryptodev_dequeue_burst (qpd->dev_id, qpd->qp_id, + cops, VLIB_FRAME_SIZE); + n_deq = n_cops; + next_index = def_next_index; + + qpd->inflights -= n_cops; + ASSERT (qpd->inflights >= 0); + + while (n_cops > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_cops > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + struct rte_crypto_op *cop; + struct rte_crypto_sym_op *sym_cop; + + cop = cops[0]; + cops += 1; + n_cops -= 1; + n_left_to_next -= 1; + + next0 = def_next_index; + + if (PREDICT_FALSE (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) + { + next0 = DPDK_CRYPTO_INPUT_NEXT_DROP; + vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, + DPDK_CRYPTO_INPUT_ERROR_COP_FAILED, + 1); + } + cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + + sym_cop = (struct rte_crypto_sym_op *) (cop + 1); + b0 = vlib_buffer_from_rte_mbuf (sym_cop->m_src); + bi0 = vlib_get_buffer_index (vm, b0); + + to_next[0] = bi0; + to_next += 1; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_next_frame (vm, node, next0); + dpdk_crypto_input_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->cdev = qpd->dev_id; + tr->qp = qpd->qp_id; + tr->status = cop->status; + tr->next_index = next0; + tr->sa_idx = vnet_buffer (b0)->ipsec.sad_index; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + crypto_free_cop (qpd, qpd->cops, n_deq); + + vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, + DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq); + return n_deq; +} + +static uword +dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + crypto_qp_data_t *qpd; + u32 n_deq = 0; + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + n_deq += dpdk_crypto_dequeue(vm, node, qpd); + /* *INDENT-ON* */ + + return n_deq; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_crypto_input_node) = +{ + .function = dpdk_crypto_input_fn, + .name = "dpdk-crypto-input", + .format_trace = format_dpdk_crypto_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, + .n_errors = DPDK_CRYPTO_INPUT_N_ERROR, + .error_strings = dpdk_crypto_input_error_strings, + .n_next_nodes = DPDK_CRYPTO_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [DPDK_CRYPTO_INPUT_NEXT_##s] = n, + foreach_dpdk_crypto_input_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_crypto_input_node, dpdk_crypto_input_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/dir.dox b/src/plugins/dpdk/ipsec/dir.dox new file mode 100644 index 00000000..ffebfc4d --- /dev/null +++ b/src/plugins/dpdk/ipsec/dir.dox @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + @dir vnet/vnet/devices/dpdk/ipsec + @brief IPSec ESP encrypt/decrypt using DPDK Cryptodev API +*/ diff --git a/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md b/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md new file mode 100644 index 00000000..fed2fe0e --- /dev/null +++ b/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md @@ -0,0 +1,86 @@ +# VPP IPSec implementation using DPDK Cryptodev API {#dpdk_crypto_ipsec_doc} + +This document is meant to contain all related information about implementation and usability. + + +## VPP IPsec with DPDK Cryptodev + +DPDK Cryptodev is an asynchronous crypto API that supports both Hardware and Software implementations (for more details refer to [DPDK Cryptography Device Library documentation](http://dpdk.org/doc/guides/prog_guide/cryptodev_lib.html)). + +When DPDK support is enabled and there are enough Cryptodev resources for all workers, the node graph is reconfigured by adding and changing default next nodes. + +The following nodes are added: +* dpdk-crypto-input : polling input node, basically dequeuing from crypto devices. +* dpdk-esp-encrypt : internal node. +* dpdk-esp-decrypt : internal node. +* dpdk-esp-encrypt-post : internal node. +* dpdk-esp-decrypt-post : internal node. + +Set new default next nodes: +* for esp encryption: esp-encrypt -> dpdk-esp-encrypt +* for esp decryption: esp-decrypt -> dpdk-esp-decrypt + + +### How to enable VPP IPSec with DPDK Cryptodev support + +DPDK Cryptodev is supported in DPDK enabled VPP. +By default, only HW Cryptodev is supported but needs to be explicetly enabled with the following config option: + +``` +dpdk { + enable-cryptodev +} +``` + +To enable SW Cryptodev support (AESNI-MB-PMD and GCM-PMD), we need the following env option: + + vpp_uses_dpdk_cryptodev_sw=yes + +A couple of ways to achive this: +* uncomment/add it in the platforms config (ie. build-data/platforms/vpp.mk) +* set the option when building vpp (ie. make vpp_uses_dpdk_cryptodev_sw=yes build-release) + +When enabling SW Cryptodev support, it means that you need to pre-build the required crypto libraries needed by those SW Cryptodev PMDs. + + +### Crypto Resources allocation + +VPP allocates crypto resources based on a best effort approach: +* first allocate Hardware crypto resources, then Software. +* if there are not enough crypto resources for all workers, the graph node is not modifed, therefore the default VPP IPsec implementation based in OpenSSL is used. The following message is displayed: + + 0: dpdk_ipsec_init: not enough cryptodevs for ipsec + + +### Configuration example + +To enable DPDK Cryptodev the user just need to provide the startup.conf option +as mentioned previously. + +Example startup.conf: + +``` +dpdk { + socket-mem 1024,1024 + num-mbufs 131072 + dev 0000:81:00.0 + dev 0000:81:00.1 + enable-cryptodev + dev 0000:85:01.0 + dev 0000:85:01.1 + vdev cryptodev_aesni_mb_pmd,socket_id=1 + vdev cryptodev_aesni_mb_pmd,socket_id=1 +} +``` + +In the above configuration: +* 0000:85:01.0 and 0000:85:01.1 are crypto BDFs and they require the same driver binding as DPDK Ethernet devices but they do not support any extra configuration options. +* Two AESNI-MB Software Cryptodev PMDs are created in NUMA node 1. + +For further details refer to [DPDK Crypto Device Driver documentation](http://dpdk.org/doc/guides/cryptodevs/index.html) + +### Operational data + +The following CLI command displays the Cryptodev/Worker mapping: + + show crypto device mapping [verbose] diff --git a/src/plugins/dpdk/ipsec/esp.h b/src/plugins/dpdk/ipsec/esp.h new file mode 100644 index 00000000..320295b1 --- /dev/null +++ b/src/plugins/dpdk/ipsec/esp.h @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DPDK_ESP_H__ +#define __DPDK_ESP_H__ + +#include +#include +#include + +typedef struct +{ + enum rte_crypto_cipher_algorithm algo; + u8 key_len; + u8 iv_len; +} dpdk_esp_crypto_alg_t; + +typedef struct +{ + enum rte_crypto_auth_algorithm algo; + u8 trunc_size; +} dpdk_esp_integ_alg_t; + +typedef struct +{ + dpdk_esp_crypto_alg_t *esp_crypto_algs; + dpdk_esp_integ_alg_t *esp_integ_algs; +} dpdk_esp_main_t; + +dpdk_esp_main_t dpdk_esp_main; + +static_always_inline void +dpdk_esp_init () +{ + dpdk_esp_main_t *em = &dpdk_esp_main; + dpdk_esp_integ_alg_t *i; + dpdk_esp_crypto_alg_t *c; + + vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1); + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 16; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 24; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 32; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128]; + c->algo = RTE_CRYPTO_CIPHER_AES_GCM; + c->key_len = 16; + c->iv_len = 8; + + vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1); + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96]; + i->algo = RTE_CRYPTO_AUTH_SHA1_HMAC; + i->trunc_size = 12; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96]; + i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + i->trunc_size = 12; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128]; + i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + i->trunc_size = 16; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192]; + i->algo = RTE_CRYPTO_AUTH_SHA384_HMAC; + i->trunc_size = 24; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256]; + i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC; + i->trunc_size = 32; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128]; + i->algo = RTE_CRYPTO_AUTH_AES_GCM; + i->trunc_size = 16; +} + +static_always_inline int +translate_crypto_algo (ipsec_crypto_alg_t crypto_algo, + struct rte_crypto_sym_xform *cipher_xform) +{ + switch (crypto_algo) + { + case IPSEC_CRYPTO_ALG_NONE: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL; + break; + case IPSEC_CRYPTO_ALG_AES_CBC_128: + case IPSEC_CRYPTO_ALG_AES_CBC_192: + case IPSEC_CRYPTO_ALG_AES_CBC_256: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; + break; + case IPSEC_CRYPTO_ALG_AES_GCM_128: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM; + break; + default: + return -1; + } + + cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + + return 0; +} + +static_always_inline int +translate_integ_algo (ipsec_integ_alg_t integ_alg, + struct rte_crypto_sym_xform *auth_xform, int use_esn) +{ + switch (integ_alg) + { + case IPSEC_INTEG_ALG_NONE: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL; + auth_xform->auth.digest_length = 0; + break; + case IPSEC_INTEG_ALG_SHA1_96: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC; + auth_xform->auth.digest_length = 12; + break; + case IPSEC_INTEG_ALG_SHA_256_96: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + auth_xform->auth.digest_length = 12; + break; + case IPSEC_INTEG_ALG_SHA_256_128: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + auth_xform->auth.digest_length = 16; + break; + case IPSEC_INTEG_ALG_SHA_384_192: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA384_HMAC; + auth_xform->auth.digest_length = 24; + break; + case IPSEC_INTEG_ALG_SHA_512_256: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC; + auth_xform->auth.digest_length = 32; + break; + case IPSEC_INTEG_ALG_AES_GCM_128: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM; + auth_xform->auth.digest_length = 16; + auth_xform->auth.add_auth_data_length = use_esn ? 12 : 8; + break; + default: + return -1; + } + + auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; + + return 0; +} + +static_always_inline int +create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, + u8 is_outbound) +{ + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + struct rte_crypto_sym_xform cipher_xform = { 0 }; + struct rte_crypto_sym_xform auth_xform = { 0 }; + struct rte_crypto_sym_xform *xfs; + uword key = 0, *data; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + sa->crypto_key_len -= 4; + clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4); + } + else + { + u32 seed = (u32) clib_cpu_time_now (); + sa->salt = random_u32 (&seed); + } + + cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; + cipher_xform.cipher.key.data = sa->crypto_key; + cipher_xform.cipher.key.length = sa->crypto_key_len; + + auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH; + auth_xform.auth.key.data = sa->integ_key; + auth_xform.auth.key.length = sa->integ_key_len; + + if (translate_crypto_algo (sa->crypto_alg, &cipher_xform) < 0) + return -1; + p_key->cipher_algo = cipher_xform.cipher.algo; + + if (translate_integ_algo (sa->integ_alg, &auth_xform, sa->use_esn) < 0) + return -1; + p_key->auth_algo = auth_xform.auth.algo; + + if (is_outbound) + { + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; + cipher_xform.next = &auth_xform; + xfs = &cipher_xform; + } + else + { + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; + auth_xform.next = &cipher_xform; + xfs = &auth_xform; + } + + p_key->is_outbound = is_outbound; + + data = hash_get (cwm->algo_qp_map, key); + if (!data) + return -1; + + sa_sess->sess = + rte_cryptodev_sym_session_create (cwm->qp_data[*data].dev_id, xfs); + + if (!sa_sess->sess) + return -1; + + sa_sess->qp_index = (u8) * data; + + return 0; +} + +#endif /* __DPDK_ESP_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/esp_decrypt.c b/src/plugins/dpdk/ipsec/esp_decrypt.c new file mode 100644 index 00000000..286e03f8 --- /dev/null +++ b/src/plugins/dpdk/ipsec/esp_decrypt.c @@ -0,0 +1,594 @@ +/* + * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define foreach_esp_decrypt_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(IP6_INPUT, "ip6-input") + +#define _(v, s) ESP_DECRYPT_NEXT_##v, +typedef enum { + foreach_esp_decrypt_next +#undef _ + ESP_DECRYPT_N_NEXT, +} esp_decrypt_next_t; + +#define foreach_esp_decrypt_error \ + _(RX_PKTS, "ESP pkts received") \ + _(DECRYPTION_FAILED, "ESP decryption failed") \ + _(REPLAY, "SA replayed packet") \ + _(NOT_IP, "Not IP packet (dropped)") \ + _(ENQ_FAIL, "Enqueue failed (buffer full)") \ + _(NO_CRYPTODEV, "Cryptodev not configured") \ + _(BAD_LEN, "Invalid ciphertext length") \ + _(UNSUPPORTED, "Cipher/Auth not supported") + + +typedef enum { +#define _(sym,str) ESP_DECRYPT_ERROR_##sym, + foreach_esp_decrypt_error +#undef _ + ESP_DECRYPT_N_ERROR, +} esp_decrypt_error_t; + +static char * esp_decrypt_error_strings[] = { +#define _(sym,string) string, + foreach_esp_decrypt_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_decrypt_node; + +typedef struct { + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; +} esp_decrypt_trace_t; + +/* packet trace format function */ +static u8 * format_esp_decrypt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *); + + s = format (s, "esp: crypto %U integrity %U", + format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg); + return s; +} + +static uword +dpdk_esp_decrypt_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next, next_index; + ipsec_main_t *im = &ipsec_main; + u32 cpu_index = os_get_cpu_number(); + dpdk_crypto_main_t * dcm = &dpdk_crypto_main; + dpdk_esp_main_t * em = &dpdk_esp_main; + u32 i; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (PREDICT_FALSE(!dcm->workers_main)) + { + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_NO_CRYPTODEV, n_left_from); + vlib_buffer_free(vm, from, n_left_from); + return n_left_from; + } + + crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index); + u32 n_qps = vec_len(cwm->qp_data); + struct rte_crypto_op ** cops_to_enq[n_qps]; + u32 n_cop_qp[n_qps], * bi_to_enq[n_qps]; + + for (i = 0; i < n_qps; i++) + { + bi_to_enq[i] = cwm->qp_data[i].bi; + cops_to_enq[i] = cwm->qp_data[i].cops; + } + + memset(n_cop_qp, 0, n_qps * sizeof(u32)); + + crypto_alloc_cops(); + + next_index = ESP_DECRYPT_NEXT_DROP; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, sa_index0 = ~0, seq, icv_size, iv_size; + vlib_buffer_t * b0; + esp_header_t * esp0; + ipsec_sa_t * sa0; + struct rte_mbuf * mb0 = 0; + const int BLOCK_SIZE = 16; + crypto_sa_session_t * sa_sess; + void * sess; + u16 qp_index; + struct rte_crypto_op * cop = 0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + esp0 = vlib_buffer_get_current (b0); + + sa_index0 = vnet_buffer(b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + seq = clib_host_to_net_u32(esp0->seq); + + /* anti-replay check */ + if (sa0->use_anti_replay) + { + int rv = 0; + + if (PREDICT_TRUE(sa0->use_esn)) + rv = esp_replay_check_esn(sa0, seq); + else + rv = esp_replay_check(sa0, seq); + + if (PREDICT_FALSE(rv)) + { + clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_REPLAY, 1); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + } + + sa0->total_data_size += b0->current_length; + + if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) || + PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE)) + { + clib_warning ("SPI %u : only cipher + auth supported", sa0->spi); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_UNSUPPORTED, 1); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0); + + if (PREDICT_FALSE(!sa_sess->sess)) + { + int ret = create_sym_sess(sa0, sa_sess, 0); + + if (PREDICT_FALSE (ret)) + { + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + } + + sess = sa_sess->sess; + qp_index = sa_sess->qp_index; + + ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); + cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); + ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); + + cops_to_enq[qp_index][0] = cop; + cops_to_enq[qp_index] += 1; + n_cop_qp[qp_index] += 1; + bi_to_enq[qp_index][0] = bi0; + bi_to_enq[qp_index] += 1; + + rte_crypto_op_attach_sym_session(cop, sess); + + icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + + /* Convert vlib buffer to mbuf */ + mb0 = rte_mbuf_from_vlib_buffer(b0); + mb0->data_len = b0->current_length; + mb0->pkt_len = b0->current_length; + mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; + + /* Outer IP header has already been stripped */ + u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) - + iv_size - icv_size; + + if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0)) + { + clib_warning ("payload %u not multiple of %d\n", + payload_len, BLOCK_SIZE); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_BAD_LEN, 1); + vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1); + bi_to_enq[qp_index] -= 1; + cops_to_enq[qp_index] -= 1; + n_cop_qp[qp_index] -= 1; + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1); + + sym_cop->m_src = mb0; + sym_cop->cipher.data.offset = sizeof (esp_header_t) + iv_size; + sym_cop->cipher.data.length = payload_len; + + u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t)); + dpdk_cop_priv_t * priv = (dpdk_cop_priv_t *)(sym_cop + 1); + + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + dpdk_gcm_cnt_blk *icb = &priv->cb; + icb->salt = sa0->salt; + clib_memcpy(icb->iv, iv, 8); + icb->cnt = clib_host_to_net_u32(1); + sym_cop->cipher.iv.data = (u8 *)icb; + sym_cop->cipher.iv.phys_addr = cop->phys_addr + + (uintptr_t)icb - (uintptr_t)cop; + sym_cop->cipher.iv.length = 16; + + u8 *aad = priv->aad; + clib_memcpy(aad, iv - sizeof(esp_header_t), 8); + sym_cop->auth.aad.data = aad; + sym_cop->auth.aad.phys_addr = cop->phys_addr + + (uintptr_t)aad - (uintptr_t)cop; + if (sa0->use_esn) + { + *((u32*)&aad[8]) = sa0->seq_hi; + sym_cop->auth.aad.length = 12; + } + else + { + sym_cop->auth.aad.length = 8; + } + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.length = icv_size; + + } + else + { + sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(mb0, void*, + sizeof (esp_header_t)); + sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + sizeof (esp_header_t)); + sym_cop->cipher.iv.length = iv_size; + + if (sa0->use_esn) + { + dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1); + u8* payload_end = rte_pktmbuf_mtod_offset( + mb0, u8*, sizeof(esp_header_t) + iv_size + payload_len); + + clib_memcpy (priv->icv, payload_end, icv_size); + *((u32*) payload_end) = sa0->seq_hi; + sym_cop->auth.data.offset = 0; + sym_cop->auth.data.length = sizeof(esp_header_t) + iv_size + + payload_len + sizeof(sa0->seq_hi); + sym_cop->auth.digest.data = priv->icv; + sym_cop->auth.digest.phys_addr = cop->phys_addr + + (uintptr_t) priv->icv - (uintptr_t) cop; + sym_cop->auth.digest.length = icv_size; + } + else + { + sym_cop->auth.data.offset = 0; + sym_cop->auth.data.length = sizeof(esp_header_t) + + iv_size + payload_len; + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.length = icv_size; + } + } + +trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_RX_PKTS, + from_frame->n_vectors); + crypto_qp_data_t *qpd; + /* *INDENT-OFF* */ + vec_foreach_index (i, cwm->qp_data) + { + u32 enq; + + qpd = vec_elt_at_index(cwm->qp_data, i); + enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, + qpd->cops, n_cop_qp[i]); + qpd->inflights += enq; + + if (PREDICT_FALSE(enq < n_cop_qp[i])) + { + crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); + vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); + + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_ENQ_FAIL, + n_cop_qp[i] - enq); + } + } + /* *INDENT-ON* */ + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = { + .function = dpdk_esp_decrypt_node_fn, + .name = "dpdk-esp-decrypt", + .vector_size = sizeof (u32), + .format_trace = format_esp_decrypt_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(esp_decrypt_error_strings), + .error_strings = esp_decrypt_error_strings, + + .n_next_nodes = ESP_DECRYPT_N_NEXT, + .next_nodes = { +#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, + foreach_esp_decrypt_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn) + +/* + * Decrypt Post Node + */ + +#define foreach_esp_decrypt_post_error \ + _(PKTS, "ESP post pkts") + +typedef enum { +#define _(sym,str) ESP_DECRYPT_POST_ERROR_##sym, + foreach_esp_decrypt_post_error +#undef _ + ESP_DECRYPT_POST_N_ERROR, +} esp_decrypt_post_error_t; + +static char * esp_decrypt_post_error_strings[] = { +#define _(sym,string) string, + foreach_esp_decrypt_post_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_decrypt_post_node; + +static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args) +{ + return s; +} + +static uword +dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next = 0, next_index; + ipsec_sa_t * sa0; + u32 sa_index0 = ~0; + ipsec_main_t *im = &ipsec_main; + dpdk_esp_main_t *em = &dpdk_esp_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + esp_footer_t * f0; + u32 bi0, next0, icv_size, iv_size; + vlib_buffer_t * b0 = 0; + ip4_header_t *ih4 = 0, *oh4 = 0; + ip6_header_t *ih6 = 0, *oh6 = 0; + u8 tunnel_mode = 1; + u8 transport_ip6 = 0; + + next0 = ESP_DECRYPT_NEXT_DROP; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sa_index0 = vnet_buffer(b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + to_next[0] = bi0; + to_next += 1; + + icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + + if (sa0->use_anti_replay) + { + esp_header_t * esp0 = vlib_buffer_get_current (b0); + u32 seq; + seq = clib_host_to_net_u32(esp0->seq); + if (PREDICT_TRUE(sa0->use_esn)) + esp_replay_advance_esn(sa0, seq); + else + esp_replay_advance(sa0, seq); + } + + ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t)); + vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size); + + b0->current_length -= (icv_size + 2); + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) + + b0->current_length); + b0->current_length -= f0->pad_length; + + /* transport mode */ + if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6)) + { + tunnel_mode = 0; + + if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40)) + { + if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60)) + transport_ip6 = 1; + else + { + clib_warning("next header: 0x%x", f0->next_header); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_NOT_IP, 1); + goto trace; + } + } + } + + if (PREDICT_TRUE (tunnel_mode)) + { + if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP)) + next0 = ESP_DECRYPT_NEXT_IP4_INPUT; + else if (f0->next_header == IP_PROTOCOL_IPV6) + next0 = ESP_DECRYPT_NEXT_IP6_INPUT; + else + { + clib_warning("next header: 0x%x", f0->next_header); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_DECRYPTION_FAILED, + 1); + goto trace; + } + } + /* transport mode */ + else + { + if (PREDICT_FALSE(transport_ip6)) + { + ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t)); + vlib_buffer_advance (b0, -sizeof(ip6_header_t)); + oh6 = vlib_buffer_get_current (b0); + memmove(oh6, ih6, sizeof(ip6_header_t)); + + next0 = ESP_DECRYPT_NEXT_IP6_INPUT; + oh6->protocol = f0->next_header; + oh6->payload_length = + clib_host_to_net_u16 ( + vlib_buffer_length_in_chain(vm, b0) - + sizeof (ip6_header_t)); + } + else + { + vlib_buffer_advance (b0, -sizeof(ip4_header_t)); + oh4 = vlib_buffer_get_current (b0); + memmove(oh4, ih4, sizeof(ip4_header_t)); + + next0 = ESP_DECRYPT_NEXT_IP4_INPUT; + oh4->ip_version_and_header_length = 0x45; + oh4->fragment_id = 0; + oh4->flags_and_fragment_offset = 0; + oh4->protocol = f0->next_header; + oh4->length = clib_host_to_net_u16 ( + vlib_buffer_length_in_chain (vm, b0)); + oh4->checksum = ip4_header_checksum (oh4); + } + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0; + +trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index, + ESP_DECRYPT_POST_ERROR_PKTS, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = { + .function = dpdk_esp_decrypt_post_node_fn, + .name = "dpdk-esp-decrypt-post", + .vector_size = sizeof (u32), + .format_trace = format_esp_decrypt_post_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(esp_decrypt_post_error_strings), + .error_strings = esp_decrypt_post_error_strings, + + .n_next_nodes = ESP_DECRYPT_N_NEXT, + .next_nodes = { +#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, + foreach_esp_decrypt_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_post_node, dpdk_esp_decrypt_post_node_fn) diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c new file mode 100644 index 00000000..5b03de73 --- /dev/null +++ b/src/plugins/dpdk/ipsec/esp_encrypt.c @@ -0,0 +1,609 @@ +/* + * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define foreach_esp_encrypt_next \ +_(DROP, "error-drop") \ +_(IP4_LOOKUP, "ip4-lookup") \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(INTERFACE_OUTPUT, "interface-output") + +#define _(v, s) ESP_ENCRYPT_NEXT_##v, +typedef enum +{ + foreach_esp_encrypt_next +#undef _ + ESP_ENCRYPT_N_NEXT, +} esp_encrypt_next_t; + +#define foreach_esp_encrypt_error \ + _(RX_PKTS, "ESP pkts received") \ + _(SEQ_CYCLED, "sequence number cycled") \ + _(ENQ_FAIL, "Enqueue failed (buffer full)") \ + _(NO_CRYPTODEV, "Cryptodev not configured") \ + _(UNSUPPORTED, "Cipher/Auth not supported") + + +typedef enum +{ +#define _(sym,str) ESP_ENCRYPT_ERROR_##sym, + foreach_esp_encrypt_error +#undef _ + ESP_ENCRYPT_N_ERROR, +} esp_encrypt_error_t; + +static char *esp_encrypt_error_strings[] = { +#define _(sym,string) string, + foreach_esp_encrypt_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_encrypt_node; + +typedef struct +{ + u32 spi; + u32 seq; + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; +} esp_encrypt_trace_t; + +/* packet trace format function */ +static u8 * +format_esp_encrypt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *); + + s = format (s, "esp: spi %u seq %u crypto %U integrity %U", + t->spi, t->seq, + format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg); + return s; +} + +static uword +dpdk_esp_encrypt_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next, next_index; + ipsec_main_t *im = &ipsec_main; + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + dpdk_esp_main_t *em = &dpdk_esp_main; + u32 i; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (PREDICT_FALSE (!dcm->workers_main)) + { + /* Likely there are not enough cryptodevs, so drop frame */ + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_NO_CRYPTODEV, + n_left_from); + vlib_buffer_free (vm, from, n_left_from); + return n_left_from; + } + + crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index); + u32 n_qps = vec_len (cwm->qp_data); + struct rte_crypto_op **cops_to_enq[n_qps]; + u32 n_cop_qp[n_qps], *bi_to_enq[n_qps]; + + for (i = 0; i < n_qps; i++) + { + bi_to_enq[i] = cwm->qp_data[i].bi; + cops_to_enq[i] = cwm->qp_data[i].cops; + } + + memset (n_cop_qp, 0, n_qps * sizeof (u32)); + + crypto_alloc_cops (); + + next_index = ESP_ENCRYPT_NEXT_DROP; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + u32 sa_index0; + ipsec_sa_t *sa0; + ip4_and_esp_header_t *ih0, *oh0 = 0; + ip6_and_esp_header_t *ih6_0, *oh6_0 = 0; + struct rte_mbuf *mb0 = 0; + esp_footer_t *f0; + u8 is_ipv6; + u8 ip_hdr_size; + u8 next_hdr_type; + u8 transport_mode = 0; + const int BLOCK_SIZE = 16; + u32 iv_size; + u16 orig_sz; + crypto_sa_session_t *sa_sess; + void *sess; + struct rte_crypto_op *cop = 0; + u16 qp_index; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sa_index0 = vnet_buffer (b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + if (PREDICT_FALSE (esp_seq_advance (sa0))) + { + clib_warning ("sequence number counter has cycled SPI %u", + sa0->spi); + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1); + //TODO: rekey SA + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + sa0->total_data_size += b0->current_length; + + sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0); + if (PREDICT_FALSE (!sa_sess->sess)) + { + int ret = create_sym_sess (sa0, sa_sess, 1); + + if (PREDICT_FALSE (ret)) + { + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + } + + qp_index = sa_sess->qp_index; + sess = sa_sess->sess; + + ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); + cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); + ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); + + cops_to_enq[qp_index][0] = cop; + cops_to_enq[qp_index] += 1; + n_cop_qp[qp_index] += 1; + bi_to_enq[qp_index][0] = bi0; + bi_to_enq[qp_index] += 1; + + ssize_t adv; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + ih0 = vlib_buffer_get_current (b0); + orig_sz = b0->current_length; + is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60; + /* is ipv6 */ + if (PREDICT_TRUE (sa0->is_tunnel)) + { + if (PREDICT_TRUE (!is_ipv6)) + adv = -sizeof (ip4_and_esp_header_t); + else + adv = -sizeof (ip6_and_esp_header_t); + } + else + { + adv = -sizeof (esp_header_t); + if (PREDICT_TRUE (!is_ipv6)) + orig_sz -= sizeof (ip4_header_t); + else + orig_sz -= sizeof (ip6_header_t); + } + + /*transport mode save the eth header before it is overwritten */ + if (PREDICT_FALSE (!sa0->is_tunnel)) + { + ethernet_header_t *ieh0 = (ethernet_header_t *) + ((u8 *) vlib_buffer_get_current (b0) - + sizeof (ethernet_header_t)); + ethernet_header_t *oeh0 = + (ethernet_header_t *) ((u8 *) ieh0 + (adv - iv_size)); + clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t)); + } + + vlib_buffer_advance (b0, adv - iv_size); + + /* XXX IP6/ip4 and IP4/IP6 not supported, only IP4/IP4 and IP6/IP6 */ + + /* is ipv6 */ + if (PREDICT_FALSE (is_ipv6)) + { + ih6_0 = (ip6_and_esp_header_t *) ih0; + ip_hdr_size = sizeof (ip6_header_t); + oh6_0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (sa0->is_tunnel)) + { + next_hdr_type = IP_PROTOCOL_IPV6; + oh6_0->ip6.ip_version_traffic_class_and_flow_label = + ih6_0->ip6.ip_version_traffic_class_and_flow_label; + } + else + { + next_hdr_type = ih6_0->ip6.protocol; + memmove (oh6_0, ih6_0, sizeof (ip6_header_t)); + } + + oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP; + oh6_0->ip6.hop_limit = 254; + oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi); + oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq); + } + else + { + ip_hdr_size = sizeof (ip4_header_t); + oh0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (sa0->is_tunnel)) + { + next_hdr_type = IP_PROTOCOL_IP_IN_IP; + oh0->ip4.tos = ih0->ip4.tos; + } + else + { + next_hdr_type = ih0->ip4.protocol; + memmove (oh0, ih0, sizeof (ip4_header_t)); + } + + oh0->ip4.ip_version_and_header_length = 0x45; + oh0->ip4.fragment_id = 0; + oh0->ip4.flags_and_fragment_offset = 0; + oh0->ip4.ttl = 254; + oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP; + oh0->esp.spi = clib_net_to_host_u32 (sa0->spi); + oh0->esp.seq = clib_net_to_host_u32 (sa0->seq); + } + + if (PREDICT_TRUE (sa0->is_tunnel && !sa0->is_tunnel_ip6)) + { + oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32; + oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32; + + /* in tunnel mode send it back to FIB */ + next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + } + else if (sa0->is_tunnel && sa0->is_tunnel_ip6) + { + oh6_0->ip6.src_address.as_u64[0] = + sa0->tunnel_src_addr.ip6.as_u64[0]; + oh6_0->ip6.src_address.as_u64[1] = + sa0->tunnel_src_addr.ip6.as_u64[1]; + oh6_0->ip6.dst_address.as_u64[0] = + sa0->tunnel_dst_addr.ip6.as_u64[0]; + oh6_0->ip6.dst_address.as_u64[1] = + sa0->tunnel_dst_addr.ip6.as_u64[1]; + + /* in tunnel mode send it back to FIB */ + next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + } + else + { + next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; + transport_mode = 1; + } + + ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG); + ASSERT (sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE); + + int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE; + + /* pad packet in input buffer */ + u8 pad_bytes = BLOCK_SIZE * blocks - 2 - orig_sz; + u8 i; + u8 *padding = vlib_buffer_get_current (b0) + b0->current_length; + + for (i = 0; i < pad_bytes; ++i) + padding[i] = i + 1; + + f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes; + f0->pad_length = pad_bytes; + f0->next_header = next_hdr_type; + b0->current_length += pad_bytes + 2 + + em->esp_integ_algs[sa0->integ_alg].trunc_size; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + vnet_buffer (b0)->sw_if_index[VLIB_RX]; + b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + struct rte_crypto_sym_op *sym_cop; + sym_cop = (struct rte_crypto_sym_op *) (cop + 1); + + dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *) (sym_cop + 1); + + vnet_buffer (b0)->unused[0] = next0; + + mb0 = rte_mbuf_from_vlib_buffer (b0); + mb0->data_len = b0->current_length; + mb0->pkt_len = b0->current_length; + mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; + + rte_crypto_op_attach_sym_session (cop, sess); + + sym_cop->m_src = mb0; + + dpdk_gcm_cnt_blk *icb = &priv->cb; + icb->salt = sa0->salt; + icb->iv[0] = sa0->seq; + icb->iv[1] = sa0->seq_hi; + + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + icb->cnt = clib_host_to_net_u32 (1); + clib_memcpy (vlib_buffer_get_current (b0) + ip_hdr_size + + sizeof (esp_header_t), icb->iv, 8); + sym_cop->cipher.data.offset = + ip_hdr_size + sizeof (esp_header_t) + iv_size; + sym_cop->cipher.data.length = BLOCK_SIZE * blocks; + sym_cop->cipher.iv.length = 16; + } + else + { + sym_cop->cipher.data.offset = + ip_hdr_size + sizeof (esp_header_t); + sym_cop->cipher.data.length = BLOCK_SIZE * blocks + iv_size; + sym_cop->cipher.iv.length = iv_size; + } + + sym_cop->cipher.iv.data = (u8 *) icb; + sym_cop->cipher.iv.phys_addr = cop->phys_addr + (uintptr_t) icb + - (uintptr_t) cop; + + + ASSERT (sa0->integ_alg < IPSEC_INTEG_N_ALG); + ASSERT (sa0->integ_alg != IPSEC_INTEG_ALG_NONE); + + if (PREDICT_FALSE (sa0->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)) + { + u8 *aad = priv->aad; + clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size, + 8); + sym_cop->auth.aad.data = aad; + sym_cop->auth.aad.phys_addr = cop->phys_addr + + (uintptr_t) aad - (uintptr_t) cop; + + if (PREDICT_FALSE (sa0->use_esn)) + { + *((u32 *) & aad[8]) = sa0->seq_hi; + sym_cop->auth.aad.length = 12; + } + else + { + sym_cop->auth.aad.length = 8; + } + } + else + { + sym_cop->auth.data.offset = ip_hdr_size; + sym_cop->auth.data.length = b0->current_length - ip_hdr_size + - em->esp_integ_algs[sa0->integ_alg].trunc_size; + + if (PREDICT_FALSE (sa0->use_esn)) + { + u8 *payload_end = + vlib_buffer_get_current (b0) + b0->current_length; + *((u32 *) payload_end) = sa0->seq_hi; + sym_cop->auth.data.length += sizeof (sa0->seq_hi); + } + } + sym_cop->auth.digest.data = vlib_buffer_get_current (b0) + + b0->current_length - + em->esp_integ_algs[sa0->integ_alg].trunc_size; + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset (mb0, + b0->current_length + - + em->esp_integ_algs + [sa0->integ_alg].trunc_size); + sym_cop->auth.digest.length = + em->esp_integ_algs[sa0->integ_alg].trunc_size; + + + if (PREDICT_FALSE (is_ipv6)) + { + oh6_0->ip6.payload_length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - + sizeof (ip6_header_t)); + } + else + { + oh0->ip4.length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4); + } + + if (transport_mode) + vlib_buffer_advance (b0, -sizeof (ethernet_header_t)); + + trace: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_encrypt_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->spi = sa0->spi; + tr->seq = sa0->seq - 1; + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_RX_PKTS, + from_frame->n_vectors); + crypto_qp_data_t *qpd; + /* *INDENT-OFF* */ + vec_foreach_index (i, cwm->qp_data) + { + u32 enq; + + qpd = vec_elt_at_index(cwm->qp_data, i); + enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, + qpd->cops, n_cop_qp[i]); + qpd->inflights += enq; + + if (PREDICT_FALSE(enq < n_cop_qp[i])) + { + crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); + vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); + + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_ENQ_FAIL, + n_cop_qp[i] - enq); + } + } + /* *INDENT-ON* */ + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) = +{ + .function = dpdk_esp_encrypt_node_fn,.name = "dpdk-esp-encrypt",.flags = + VLIB_NODE_FLAG_IS_OUTPUT,.vector_size = sizeof (u32),.format_trace = + format_esp_encrypt_trace,.n_errors = + ARRAY_LEN (esp_encrypt_error_strings),.error_strings = + esp_encrypt_error_strings,.n_next_nodes = 1,.next_nodes = + { + [ESP_ENCRYPT_NEXT_DROP] = "error-drop",} +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn) +/* + * ESP Encrypt Post Node + */ +#define foreach_esp_encrypt_post_error \ + _(PKTS, "ESP post pkts") + typedef enum + { +#define _(sym,str) ESP_ENCRYPT_POST_ERROR_##sym, + foreach_esp_encrypt_post_error +#undef _ + ESP_ENCRYPT_POST_N_ERROR, + } esp_encrypt_post_error_t; + + static char *esp_encrypt_post_error_strings[] = { +#define _(sym,string) string, + foreach_esp_encrypt_post_error +#undef _ + }; + +vlib_node_registration_t dpdk_esp_encrypt_post_node; + +static u8 * +format_esp_encrypt_post_trace (u8 * s, va_list * args) +{ + return s; +} + +static uword +dpdk_esp_encrypt_post_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next = 0, next_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + to_next[0] = bi0; + to_next += 1; + + next0 = vnet_buffer (b0)->unused[0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, dpdk_esp_encrypt_post_node.index, + ESP_ENCRYPT_POST_ERROR_PKTS, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_encrypt_post_node) = +{ + .function = dpdk_esp_encrypt_post_node_fn,.name = + "dpdk-esp-encrypt-post",.vector_size = sizeof (u32),.format_trace = + format_esp_encrypt_post_trace,.type = VLIB_NODE_TYPE_INTERNAL,.n_errors = + ARRAY_LEN (esp_encrypt_post_error_strings),.error_strings = + esp_encrypt_post_error_strings,.n_next_nodes = + ESP_ENCRYPT_N_NEXT,.next_nodes = + { +#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n, + foreach_esp_encrypt_next +#undef _ + } +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_post_node, + dpdk_esp_encrypt_post_node_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c new file mode 100644 index 00000000..16bec20a --- /dev/null +++ b/src/plugins/dpdk/ipsec/ipsec.c @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#define DPDK_CRYPTO_NB_SESS_OBJS 20000 +#define DPDK_CRYPTO_CACHE_SIZE 512 +#define DPDK_CRYPTO_PRIV_SIZE 128 +#define DPDK_CRYPTO_N_QUEUE_DESC 1024 +#define DPDK_CRYPTO_NB_COPS (1024 * 4) + +static int +add_del_sa_sess (u32 sa_index, u8 is_add) +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm; + u8 skip_master = vlib_num_workers () > 0; + + /* *INDENT-OFF* */ + vec_foreach (cwm, dcm->workers_main) + { + crypto_sa_session_t *sa_sess; + u8 is_outbound; + + if (skip_master) + { + skip_master = 0; + continue; + } + + for (is_outbound = 0; is_outbound < 2; is_outbound++) + { + if (is_add) + { + pool_get (cwm->sa_sess_d[is_outbound], sa_sess); + } + else + { + u8 dev_id; + + sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index); + dev_id = cwm->qp_data[sa_sess->qp_index].dev_id; + + if (!sa_sess->sess) + continue; + + if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess)) + { + clib_warning("failed to free session"); + return -1; + } + memset(sa_sess, 0, sizeof(sa_sess[0])); + } + } + } + /* *INDENT-OFF* */ + + return 0; +} + +static void +update_qp_data (crypto_worker_main_t * cwm, + u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx) +{ + crypto_qp_data_t *qpd; + + /* *INDENT-OFF* */ + vec_foreach_index (*idx, cwm->qp_data) + { + qpd = vec_elt_at_index(cwm->qp_data, *idx); + + if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id && + qpd->is_outbound == is_outbound) + return; + } + /* *INDENT-ON* */ + + vec_add2 (cwm->qp_data, qpd, 1); + + qpd->dev_id = cdev_id; + qpd->qp_id = qp_id; + qpd->is_outbound = is_outbound; +} + +/* + * return: + * 0: already exist + * 1: mapped + */ +static int +add_mapping (crypto_worker_main_t * cwm, + u8 cdev_id, u16 qp, u8 is_outbound, + const struct rte_cryptodev_capabilities *cipher_cap, + const struct rte_cryptodev_capabilities *auth_cap) +{ + u16 qp_index; + uword key = 0, data, *ret; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + + p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo; + p_key->auth_algo = (u8) auth_cap->sym.auth.algo; + p_key->is_outbound = is_outbound; + + ret = hash_get (cwm->algo_qp_map, key); + if (ret) + return 0; + + update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index); + + data = (uword) qp_index; + hash_set (cwm->algo_qp_map, key, data); + + return 1; +} + +/* + * return: + * 0: already exist + * 1: mapped + */ +static int +add_cdev_mapping (crypto_worker_main_t * cwm, + struct rte_cryptodev_info *dev_info, u8 cdev_id, + u16 qp, u8 is_outbound) +{ + const struct rte_cryptodev_capabilities *i, *j; + u32 mapped = 0; + + for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++) + { + if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER) + continue; + + if (check_algo_is_supported (i, NULL) != 0) + continue; + + for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; + j++) + { + if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH) + continue; + + if (check_algo_is_supported (j, NULL) != 0) + continue; + + mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, j); + } + } + + return mapped; +} + +static int +check_cryptodev_queues () +{ + u32 n_qs = 0; + u8 cdev_id; + u32 n_req_qs = 2; + + if (vlib_num_workers () > 0) + n_req_qs = vlib_num_workers () * 2; + + for (cdev_id = 0; cdev_id < rte_cryptodev_count (); cdev_id++) + { + struct rte_cryptodev_info cdev_info; + + rte_cryptodev_info_get (cdev_id, &cdev_info); + + if (! + (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) + continue; + + n_qs += cdev_info.max_nb_queue_pairs; + } + + if (n_qs >= n_req_qs) + return 0; + else + return -1; +} + +static clib_error_t * +dpdk_ipsec_check_support (ipsec_sa_t * sa) +{ + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) + return clib_error_return (0, "unsupported integ-alg %U with " + "crypto-algo aes-gcm-128", + format_ipsec_integ_alg, sa->integ_alg); + sa->integ_alg = IPSEC_INTEG_ALG_AES_GCM_128; + } + else + { + if (sa->integ_alg == IPSEC_INTEG_ALG_NONE || + sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) + return clib_error_return (0, "unsupported integ-alg %U", + format_ipsec_integ_alg, sa->integ_alg); + } + + return 0; +} + +static uword +dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + dpdk_config_main_t *conf = &dpdk_config_main; + ipsec_main_t *im = &ipsec_main; + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + struct rte_cryptodev_config dev_conf; + struct rte_cryptodev_qp_conf qp_conf; + struct rte_cryptodev_info cdev_info; + struct rte_mempool *rmp; + i32 dev_id, ret; + u32 i, skip_master; + + if (!conf->cryptodev) + { + clib_warning ("DPDK Cryptodev support is disabled, " + "default to OpenSSL IPsec"); + return 0; + } + + if (check_cryptodev_queues () < 0) + { + conf->cryptodev = 0; + clib_warning ("not enough Cryptodevs, default to OpenSSL IPsec"); + return 0; + } + + vec_alloc (dcm->workers_main, tm->n_vlib_mains); + _vec_len (dcm->workers_main) = tm->n_vlib_mains; + + fprintf (stdout, "DPDK Cryptodevs info:\n"); + fprintf (stdout, "dev_id\tn_qp\tnb_obj\tcache_size\n"); + /* HW cryptodevs have higher dev_id, use HW first */ + for (dev_id = rte_cryptodev_count () - 1; dev_id >= 0; dev_id--) + { + u16 max_nb_qp, qp = 0; + skip_master = vlib_num_workers () > 0; + + rte_cryptodev_info_get (dev_id, &cdev_info); + + if (! + (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) + continue; + + max_nb_qp = cdev_info.max_nb_queue_pairs; + + for (i = 0; i < tm->n_vlib_mains; i++) + { + u8 is_outbound; + crypto_worker_main_t *cwm; + uword *map; + + if (skip_master) + { + skip_master = 0; + continue; + } + + cwm = vec_elt_at_index (dcm->workers_main, i); + map = cwm->algo_qp_map; + + if (!map) + { + map = hash_create (0, sizeof (crypto_worker_qp_key_t)); + if (!map) + { + clib_warning ("unable to create hash table for worker %u", + vlib_mains[i]->cpu_index); + goto error; + } + cwm->algo_qp_map = map; + } + + for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp; + is_outbound++) + qp += add_cdev_mapping (cwm, &cdev_info, dev_id, qp, is_outbound); + } + + if (qp == 0) + continue; + + dev_conf.socket_id = rte_cryptodev_socket_id (dev_id); + dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs; + dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS; + dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE; + + ret = rte_cryptodev_configure (dev_id, &dev_conf); + if (ret < 0) + { + clib_warning ("cryptodev %u config error", dev_id); + goto error; + } + + qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC; + for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) + { + ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf, + dev_conf.socket_id); + if (ret < 0) + { + clib_warning ("cryptodev %u qp %u setup error", dev_id, qp); + goto error; + } + } + vec_validate_aligned (dcm->cop_pools, dev_conf.socket_id, + CLIB_CACHE_LINE_BYTES); + + if (!vec_elt (dcm->cop_pools, dev_conf.socket_id)) + { + u8 *pool_name = format (0, "crypto_op_pool_socket%u%c", + dev_conf.socket_id, 0); + + rmp = rte_crypto_op_pool_create ((char *) pool_name, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + DPDK_CRYPTO_NB_COPS * + (1 + vlib_num_workers ()), + DPDK_CRYPTO_CACHE_SIZE, + DPDK_CRYPTO_PRIV_SIZE, + dev_conf.socket_id); + vec_free (pool_name); + + if (!rmp) + { + clib_warning ("failed to allocate mempool on socket %u", + dev_conf.socket_id); + goto error; + } + vec_elt (dcm->cop_pools, dev_conf.socket_id) = rmp; + } + + fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs, + DPDK_CRYPTO_NB_SESS_OBJS, DPDK_CRYPTO_CACHE_SIZE); + } + + dpdk_esp_init (); + + /* Add new next node and set as default */ + vlib_node_t *node, *next_node; + + next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-encrypt"); + ASSERT (next_node); + node = vlib_get_node_by_name (vm, (u8 *) "ipsec-output-ip4"); + ASSERT (node); + im->esp_encrypt_node_index = next_node->index; + im->esp_encrypt_next_index = + vlib_node_add_next (vm, node->index, next_node->index); + + next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-decrypt"); + ASSERT (next_node); + node = vlib_get_node_by_name (vm, (u8 *) "ipsec-input-ip4"); + ASSERT (node); + im->esp_decrypt_node_index = next_node->index; + im->esp_decrypt_next_index = + vlib_node_add_next (vm, node->index, next_node->index); + + im->cb.check_support_cb = dpdk_ipsec_check_support; + im->cb.add_del_sa_sess_cb = add_del_sa_sess; + + if (vec_len (vlib_mains) == 0) + vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index, + VLIB_NODE_STATE_POLLING); + else + for (i = 1; i < tm->n_vlib_mains; i++) + vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, + VLIB_NODE_STATE_POLLING); + + /* TODO cryptodev counters */ + + return 0; + +error: + ; + crypto_worker_main_t *cwm; + struct rte_mempool **mp; + /* *INDENT-OFF* */ + vec_foreach (cwm, dcm->workers_main) + hash_free (cwm->algo_qp_map); + + vec_foreach (mp, dcm->cop_pools) + { + if (mp) + rte_mempool_free (mp[0]); + } + /* *INDENT-ON* */ + vec_free (dcm->workers_main); + vec_free (dcm->cop_pools); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_ipsec_process_node,static) = { + .function = dpdk_ipsec_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dpdk-ipsec-process", + .process_log2_n_stack_bytes = 17, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/ipsec.h b/src/plugins/dpdk/ipsec/ipsec.h new file mode 100644 index 00000000..3465b361 --- /dev/null +++ b/src/plugins/dpdk/ipsec/ipsec.h @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DPDK_IPSEC_H__ +#define __DPDK_IPSEC_H__ + +#include + +#undef always_inline +#include +#include + +#if CLIB_DEBUG > 0 +#define always_inline static inline +#else +#define always_inline static inline __attribute__ ((__always_inline__)) +#endif + + +#define MAX_QP_PER_LCORE 16 + +typedef struct +{ + u32 salt; + u32 iv[2]; + u32 cnt; +} dpdk_gcm_cnt_blk; + +typedef struct +{ + dpdk_gcm_cnt_blk cb; + union + { + u8 aad[12]; + u8 icv[64]; + }; +} dpdk_cop_priv_t; + +typedef struct +{ + u8 cipher_algo; + u8 auth_algo; + u8 is_outbound; +} crypto_worker_qp_key_t; + +typedef struct +{ + u16 dev_id; + u16 qp_id; + u16 is_outbound; + i16 inflights; + u32 bi[VLIB_FRAME_SIZE]; + struct rte_crypto_op *cops[VLIB_FRAME_SIZE]; + struct rte_crypto_op **free_cops; +} crypto_qp_data_t; + +typedef struct +{ + u8 qp_index; + void *sess; +} crypto_sa_session_t; + +typedef struct +{ + crypto_sa_session_t *sa_sess_d[2]; + crypto_qp_data_t *qp_data; + uword *algo_qp_map; +} crypto_worker_main_t; + +typedef struct +{ + struct rte_mempool **cop_pools; + crypto_worker_main_t *workers_main; +} dpdk_crypto_main_t; + +dpdk_crypto_main_t dpdk_crypto_main; + +extern vlib_node_registration_t dpdk_crypto_input_node; + +#define CRYPTO_N_FREE_COPS (VLIB_FRAME_SIZE * 3) + +static_always_inline void +crypto_alloc_cops () +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + u32 cpu_index = os_get_cpu_number (); + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + unsigned socket_id = rte_socket_id (); + crypto_qp_data_t *qpd; + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + { + u32 l = vec_len (qpd->free_cops); + + if (PREDICT_FALSE (l < VLIB_FRAME_SIZE)) + { + u32 n_alloc; + + if (PREDICT_FALSE (!qpd->free_cops)) + vec_alloc (qpd->free_cops, CRYPTO_N_FREE_COPS); + + n_alloc = rte_crypto_op_bulk_alloc (dcm->cop_pools[socket_id], + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + &qpd->free_cops[l], + CRYPTO_N_FREE_COPS - l - 1); + + _vec_len (qpd->free_cops) = l + n_alloc; + } + } + /* *INDENT-ON* */ +} + +static_always_inline void +crypto_free_cop (crypto_qp_data_t * qpd, struct rte_crypto_op **cops, u32 n) +{ + u32 l = vec_len (qpd->free_cops); + + if (l + n >= CRYPTO_N_FREE_COPS) + { + l -= VLIB_FRAME_SIZE; + rte_mempool_put_bulk (cops[0]->mempool, + (void **) &qpd->free_cops[l], VLIB_FRAME_SIZE); + } + clib_memcpy (&qpd->free_cops[l], cops, sizeof (*cops) * n); + + _vec_len (qpd->free_cops) = l + n; +} + +static_always_inline int +check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, + char *name) +{ + struct + { + uint8_t cipher_algo; + enum rte_crypto_sym_xform_type type; + union + { + enum rte_crypto_auth_algorithm auth; + enum rte_crypto_cipher_algorithm cipher; + }; + char *name; + } supported_algo[] = + { + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_NULL,.name = "NULL"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_CTR,.name = "AES_CTR"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_3DES_CBC,.name = "3DES-CBC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA256_HMAC,.name = "HMAC-SHA256"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA384_HMAC,.name = "HMAC-SHA384"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_AES_XCBC_MAC,.name = "AES-XCBC-MAC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"}, + { + /* tail */ + .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED},}; + uint32_t i = 0; + + if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) + return -1; + + while (supported_algo[i].type != RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED) + { + if (cap->sym.xform_type == supported_algo[i].type) + { + if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER && + cap->sym.cipher.algo == supported_algo[i].cipher) || + (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH && + cap->sym.auth.algo == supported_algo[i].auth)) + { + if (name) + strcpy (name, supported_algo[i].name); + return 0; + } + } + + i++; + } + + return -1; +} + +#endif /* __DPDK_IPSEC_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c new file mode 100644 index 00000000..8073a50a --- /dev/null +++ b/src/plugins/dpdk/main.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +/* + * Called by the dpdk driver's rte_delay_us() function. + * Return 0 to have the dpdk do a regular delay loop. + * Return 1 if to skip the delay loop because we are suspending + * the calling vlib process instead. + */ +int +rte_delay_us_override (unsigned us) +{ + vlib_main_t *vm; + + /* Don't bother intercepting for short delays */ + if (us < 10) + return 0; + + /* + * Only intercept if we are in a vlib process. + * If we are called from a vlib worker thread or the vlib main + * thread then do not intercept. (Must not be called from an + * independent pthread). + */ + if (os_get_cpu_number () == 0) + { + /* + * We're in the vlib main thread or a vlib process. Make sure + * the process is running and we're not still initializing. + */ + vm = vlib_get_main (); + if (vlib_in_process_context (vm)) + { + /* Only suspend for the admin_down_process */ + vlib_process_t *proc = vlib_get_current_process (vm); + if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) || + (proc->node_runtime.function != admin_up_down_process)) + return 0; + + f64 delay = 1e-6 * us; + vlib_process_suspend (vm, delay); + return 1; + } + } + return 0; // no override +} + +static void +rte_delay_us_override_cb (unsigned us) +{ + if (rte_delay_us_override (us) == 0) + rte_delay_us_block (us); +} + +static clib_error_t * dpdk_main_init (vlib_main_t * vm) +{ + dpdk_main_t * dm = &dpdk_main; + clib_error_t * error = 0; + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main (); + + if ((error = vlib_call_init_function (vm, dpdk_init))) + return error; + + /* register custom delay function */ + rte_delay_us_callback_register (rte_delay_us_override_cb); + + return error; +} + +VLIB_INIT_FUNCTION (dpdk_main_init); + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, +}; +/* *INDENT-ON* */ diff --git a/src/plugins/dpdk/thread.c b/src/plugins/dpdk/thread.c new file mode 100644 index 00000000..3a3fcc6c --- /dev/null +++ b/src/plugins/dpdk/thread.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static clib_error_t * +dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) +{ + int r; + r = rte_eal_remote_launch (fp, (void *) w, lcore_id); + if (r) + return clib_error_return (0, "Failed to launch thread %u", lcore_id); + return 0; +} + +static clib_error_t * +dpdk_thread_set_lcore (u32 thread, u16 lcore) +{ + return 0; +} + +static vlib_thread_callbacks_t callbacks = { + .vlib_launch_thread_cb = &dpdk_launch_thread, + .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore, +}; + +static clib_error_t * +dpdk_thread_init (vlib_main_t * vm) +{ + vlib_thread_cb_register (vm, &callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_thread_init); + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 999f9869..14e78817 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -3970,13 +3970,6 @@ _(feature_enable_disable_reply) \ _(sw_interface_tag_add_del_reply) \ _(sw_interface_set_mtu_reply) -#if DPDK > 0 -#define foreach_standard_dpdk_reply_retval_handler \ -_(sw_interface_set_dpdk_hqos_pipe_reply) \ -_(sw_interface_set_dpdk_hqos_subport_reply) \ -_(sw_interface_set_dpdk_hqos_tctbl_reply) -#endif - #define _(n) \ static void vl_api_##n##_t_handler \ (vl_api_##n##_t * mp) \ @@ -4008,39 +4001,6 @@ foreach_standard_reply_retval_handler; foreach_standard_reply_retval_handler; #undef _ -#if DPDK > 0 -#define _(n) \ - static void vl_api_##n##_t_handler \ - (vl_api_##n##_t * mp) \ - { \ - vat_main_t * vam = &vat_main; \ - i32 retval = ntohl(mp->retval); \ - if (vam->async_mode) { \ - vam->async_errors += (retval < 0); \ - } else { \ - vam->retval = retval; \ - vam->result_ready = 1; \ - } \ - } -foreach_standard_dpdk_reply_retval_handler; -#undef _ - -#define _(n) \ - static void vl_api_##n##_t_handler_json \ - (vl_api_##n##_t * mp) \ - { \ - vat_main_t * vam = &vat_main; \ - vat_json_node_t node; \ - vat_json_init_object(&node); \ - vat_json_object_add_int(&node, "retval", ntohl(mp->retval)); \ - vat_json_print(vam->ofp, &node); \ - vam->retval = ntohl(mp->retval); \ - vam->result_ready = 1; \ - } -foreach_standard_dpdk_reply_retval_handler; -#undef _ -#endif - /* * Table of message reply handlers, must include boilerplate handlers * we just generated @@ -4272,16 +4232,6 @@ _(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) -#if DPDK > 0 -#define foreach_vpe_dpdk_api_reply_msg \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \ - sw_interface_set_dpdk_hqos_pipe_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \ - sw_interface_set_dpdk_hqos_subport_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ - sw_interface_set_dpdk_hqos_tctbl_reply) -#endif - typedef struct { u8 *name; @@ -5081,226 +5031,6 @@ api_sw_interface_clear_stats (vat_main_t * vam) return ret; } -#if DPDK >0 -static int -api_sw_interface_set_dpdk_hqos_pipe (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_sw_interface_set_dpdk_hqos_pipe_t *mp; - u32 sw_if_index; - u8 sw_if_index_set = 0; - u32 subport; - u8 subport_set = 0; - u32 pipe; - u8 pipe_set = 0; - u32 profile; - u8 profile_set = 0; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "rx %U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %u", &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "subport %u", &subport)) - subport_set = 1; - else - if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "pipe %u", &pipe)) - pipe_set = 1; - else if (unformat (i, "profile %u", &profile)) - profile_set = 1; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing interface name or sw_if_index"); - return -99; - } - - if (subport_set == 0) - { - errmsg ("missing subport "); - return -99; - } - - if (pipe_set == 0) - { - errmsg ("missing pipe"); - return -99; - } - - if (profile_set == 0) - { - errmsg ("missing profile"); - return -99; - } - - M (SW_INTERFACE_SET_DPDK_HQOS_PIPE, mp); - - mp->sw_if_index = ntohl (sw_if_index); - mp->subport = ntohl (subport); - mp->pipe = ntohl (pipe); - mp->profile = ntohl (profile); - - - S (mp); - W (ret); - return ret; -} - -static int -api_sw_interface_set_dpdk_hqos_subport (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_sw_interface_set_dpdk_hqos_subport_t *mp; - u32 sw_if_index; - u8 sw_if_index_set = 0; - u32 subport; - u8 subport_set = 0; - u32 tb_rate = 1250000000; /* 10GbE */ - u32 tb_size = 1000000; - u32 tc_rate[] = { 1250000000, 1250000000, 1250000000, 1250000000 }; - u32 tc_period = 10; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "rx %U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %u", &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "subport %u", &subport)) - subport_set = 1; - else - if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "rate %u", &tb_rate)) - { - u32 tc_id; - - for (tc_id = 0; tc_id < (sizeof (tc_rate) / sizeof (tc_rate[0])); - tc_id++) - tc_rate[tc_id] = tb_rate; - } - else if (unformat (i, "bktsize %u", &tb_size)) - ; - else if (unformat (i, "tc0 %u", &tc_rate[0])) - ; - else if (unformat (i, "tc1 %u", &tc_rate[1])) - ; - else if (unformat (i, "tc2 %u", &tc_rate[2])) - ; - else if (unformat (i, "tc3 %u", &tc_rate[3])) - ; - else if (unformat (i, "period %u", &tc_period)) - ; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing interface name or sw_if_index"); - return -99; - } - - if (subport_set == 0) - { - errmsg ("missing subport "); - return -99; - } - - M (SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, mp); - - mp->sw_if_index = ntohl (sw_if_index); - mp->subport = ntohl (subport); - mp->tb_rate = ntohl (tb_rate); - mp->tb_size = ntohl (tb_size); - mp->tc_rate[0] = ntohl (tc_rate[0]); - mp->tc_rate[1] = ntohl (tc_rate[1]); - mp->tc_rate[2] = ntohl (tc_rate[2]); - mp->tc_rate[3] = ntohl (tc_rate[3]); - mp->tc_period = ntohl (tc_period); - - S (mp); - W (ret); - return ret; -} - -static int -api_sw_interface_set_dpdk_hqos_tctbl (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_sw_interface_set_dpdk_hqos_tctbl_t *mp; - u32 sw_if_index; - u8 sw_if_index_set = 0; - u8 entry_set = 0; - u8 tc_set = 0; - u8 queue_set = 0; - u32 entry, tc, queue; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "rx %U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %u", &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "entry %d", &entry)) - entry_set = 1; - else if (unformat (i, "tc %d", &tc)) - tc_set = 1; - else if (unformat (i, "queue %d", &queue)) - queue_set = 1; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing interface name or sw_if_index"); - return -99; - } - - if (entry_set == 0) - { - errmsg ("missing entry "); - return -99; - } - - if (tc_set == 0) - { - errmsg ("missing traffic class "); - return -99; - } - - if (queue_set == 0) - { - errmsg ("missing queue "); - return -99; - } - - M (SW_INTERFACE_SET_DPDK_HQOS_TCTBL, mp); - - mp->sw_if_index = ntohl (sw_if_index); - mp->entry = ntohl (entry); - mp->tc = ntohl (tc); - mp->queue = ntohl (queue); - - S (mp); - W (ret); - return ret; -} -#endif - static int api_sw_interface_add_del_address (vat_main_t * vam) { @@ -18656,18 +18386,6 @@ _(sw_interface_set_mtu, " | sw_if_index mtu ") \ _(ip_neighbor_dump, "[ip6] | sw_if_index ") \ _(sw_interface_get_table, " | sw_if_index [ipv6]") -#if DPDK > 0 -#define foreach_vpe_dpdk_api_msg \ -_(sw_interface_set_dpdk_hqos_pipe, \ - "rx | sw_if_index subport pipe \n" \ - "profile \n") \ -_(sw_interface_set_dpdk_hqos_subport, \ - "rx | sw_if_index subport [rate ]\n" \ - "[bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ]\n") \ -_(sw_interface_set_dpdk_hqos_tctbl, \ - "rx | sw_if_index entry tc queue \n") -#endif - /* List of command functions, CLI names map directly to functions */ #define foreach_cli_function \ _(comment, "usage: comment ") \ @@ -18705,22 +18423,6 @@ _(unset, "usage: unset ") foreach_vpe_api_reply_msg; #undef _ -#if DPDK > 0 -#define _(N,n) \ - static void vl_api_##n##_t_handler_uni \ - (vl_api_##n##_t * mp) \ - { \ - vat_main_t * vam = &vat_main; \ - if (vam->json_output) { \ - vl_api_##n##_t_handler_json(mp); \ - } else { \ - vl_api_##n##_t_handler(mp); \ - } \ - } -foreach_vpe_dpdk_api_reply_msg; -#undef _ -#endif - void vat_api_hookup (vat_main_t * vam) { @@ -18734,18 +18436,6 @@ vat_api_hookup (vat_main_t * vam) foreach_vpe_api_reply_msg; #undef _ -#if DPDK > 0 -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler_uni, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_dpdk_api_reply_msg; -#undef _ -#endif - #if (VPP_API_TEST_BUILTIN==0) vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE); #endif @@ -18760,21 +18450,11 @@ vat_api_hookup (vat_main_t * vam) #define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); foreach_vpe_api_msg; #undef _ -#if DPDK >0 -#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); - foreach_vpe_dpdk_api_msg; -#undef _ -#endif /* Help strings */ #define _(n,h) hash_set_mem (vam->help_by_name, #n, h); foreach_vpe_api_msg; #undef _ -#if DPDK >0 -#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); - foreach_vpe_dpdk_api_msg; -#undef _ -#endif /* CLI functions */ #define _(n,h) hash_set_mem (vam->function_by_name, #n, n); diff --git a/src/vnet.am b/src/vnet.am index 923f61d8..84930f05 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -23,8 +23,7 @@ libvnet_la_DEPENDENCIES = \ libvlibmemory.la \ libvlibmemoryclient.la -libvnet_la_LIBADD = $(libvnet_la_DEPENDENCIES) -lm -lpthread -ldl -lrt $(DPDK_LD_ADD) -libvnet_la_LDFLAGS = $(DPDK_LD_FLAGS) +libvnet_la_LIBADD = $(libvnet_la_DEPENDENCIES) -lm -lpthread -ldl -lrt if WITH_LIBSSL libvnet_la_LIBADD += -lcrypto @@ -396,15 +395,6 @@ libvnet_la_SOURCES += \ vnet/ipsec/ipsec_api.c API_FILES += vnet/ipsec/ipsec.api - -if WITH_DPDK -libvnet_la_SOURCES += \ - vnet/devices/dpdk/ipsec/esp_encrypt.c \ - vnet/devices/dpdk/ipsec/esp_decrypt.c \ - vnet/devices/dpdk/ipsec/crypto_node.c \ - vnet/devices/dpdk/ipsec/cli.c \ - vnet/devices/dpdk/ipsec/ipsec.c -endif endif libvnet_la_SOURCES += \ @@ -416,11 +406,6 @@ nobase_include_HEADERS += \ vnet/ipsec/ikev2.h \ vnet/ipsec/ikev2_priv.h \ vnet/ipsec/ipsec.api.h -if WITH_DPDK -nobase_include_HEADERS += \ - vnet/devices/dpdk/ipsec/ipsec.h \ - vnet/devices/dpdk/ipsec/esp.h -endif ######################################## # Layer 3 protocol: osi @@ -803,29 +788,7 @@ nobase_include_HEADERS += \ vnet/pg/pg.h \ vnet/pg/edit.h -######################################## -# DPDK -######################################## -if WITH_DPDK -libvnet_la_SOURCES += \ - vnet/devices/dpdk/buffer.c \ - vnet/devices/dpdk/dpdk_priv.h \ - vnet/devices/dpdk/device.c \ - vnet/devices/dpdk/format.c \ - vnet/devices/dpdk/init.c \ - vnet/devices/dpdk/main.c \ - vnet/devices/dpdk/node.c \ - vnet/devices/dpdk/thread.c \ - vnet/devices/dpdk/hqos.c \ - vnet/devices/dpdk/cli.c \ - vnet/devices/dpdk/dpdk_api.c - -nobase_include_HEADERS += \ - vnet/devices/dpdk/dpdk.h \ - vnet/devices/dpdk/dpdk.api.h - -API_FILES += vnet/devices/dpdk/dpdk.api -else +if !WITH_DPDK libvnet_la_SOURCES += \ vnet/devices/nic/ixge.c \ vnet/devices/nic/ixge.h \ diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c deleted file mode 100644 index f95d4cb5..00000000 --- a/src/vnet/devices/dpdk/buffer.c +++ /dev/null @@ -1,588 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * buffer.c: allocate/free network buffers. - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @file - * - * Allocate/free network buffers. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - -STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, - "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); - -static_always_inline void -dpdk_rte_pktmbuf_free (vlib_main_t * vm, vlib_buffer_t * b) -{ - vlib_buffer_t *hb = b; - struct rte_mbuf *mb; - u32 next, flags; - mb = rte_mbuf_from_vlib_buffer (hb); - -next: - flags = b->flags; - next = b->next_buffer; - mb = rte_mbuf_from_vlib_buffer (b); - - if (PREDICT_FALSE (b->n_add_refs)) - { - rte_mbuf_refcnt_update (mb, b->n_add_refs); - b->n_add_refs = 0; - } - - rte_pktmbuf_free_seg (mb); - - if (flags & VLIB_BUFFER_NEXT_PRESENT) - { - b = vlib_get_buffer (vm, next); - goto next; - } -} - -static void -del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) -{ - u32 i; - vlib_buffer_t *b; - - for (i = 0; i < vec_len (f->buffers); i++) - { - b = vlib_get_buffer (vm, f->buffers[i]); - dpdk_rte_pktmbuf_free (vm, b); - } - - vec_free (f->name); - vec_free (f->buffers); -} - -/* Add buffer free list. */ -static void -dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *f; - u32 merge_index; - int i; - - ASSERT (os_get_cpu_number () == 0); - - f = vlib_buffer_get_free_list (vm, free_list_index); - - merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); - if (merge_index != ~0 && merge_index != free_list_index) - { - vlib_buffer_merge_free_lists (pool_elt_at_index - (bm->buffer_free_list_pool, merge_index), - f); - } - - del_free_list (vm, f); - - /* Poison it. */ - memset (f, 0xab, sizeof (f[0])); - - pool_put (bm->buffer_free_list_pool, f); - - for (i = 1; i < vec_len (vlib_mains); i++) - { - bm = vlib_mains[i]->buffer_main; - f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);; - memset (f, 0xab, sizeof (f[0])); - pool_put (bm->buffer_free_list_pool, f); - } -} - -/* Make sure free list has at least given number of free buffers. */ -static uword -fill_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * fl, uword min_free_buffers) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_buffer_t *b0, *b1, *b2, *b3; - int n, i; - u32 bi0, bi1, bi2, bi3; - unsigned socket_id = rte_socket_id (); - struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id]; - struct rte_mbuf *mb0, *mb1, *mb2, *mb3; - - /* Too early? */ - if (PREDICT_FALSE (rmp == 0)) - return 0; - - /* Already have enough free buffers on free list? */ - n = min_free_buffers - vec_len (fl->buffers); - if (n <= 0) - return min_free_buffers; - - /* Always allocate round number of buffers. */ - n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32)); - - /* Always allocate new buffers in reasonably large sized chunks. */ - n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); - - vec_validate (vm->mbuf_alloc_list, n - 1); - - if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) - return 0; - - _vec_len (vm->mbuf_alloc_list) = n; - - i = 0; - - while (i < (n - 7)) - { - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 4]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 5]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 6]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 7]), STORE); - - mb0 = vm->mbuf_alloc_list[i]; - mb1 = vm->mbuf_alloc_list[i + 1]; - mb2 = vm->mbuf_alloc_list[i + 2]; - mb3 = vm->mbuf_alloc_list[i + 3]; - - ASSERT (rte_mbuf_refcnt_read (mb0) == 0); - ASSERT (rte_mbuf_refcnt_read (mb1) == 0); - ASSERT (rte_mbuf_refcnt_read (mb2) == 0); - ASSERT (rte_mbuf_refcnt_read (mb3) == 0); - - rte_mbuf_refcnt_set (mb0, 1); - rte_mbuf_refcnt_set (mb1, 1); - rte_mbuf_refcnt_set (mb2, 1); - rte_mbuf_refcnt_set (mb3, 1); - - b0 = vlib_buffer_from_rte_mbuf (mb0); - b1 = vlib_buffer_from_rte_mbuf (mb1); - b2 = vlib_buffer_from_rte_mbuf (mb2); - b3 = vlib_buffer_from_rte_mbuf (mb3); - - bi0 = vlib_get_buffer_index (vm, b0); - bi1 = vlib_get_buffer_index (vm, b1); - bi2 = vlib_get_buffer_index (vm, b2); - bi3 = vlib_get_buffer_index (vm, b3); - - vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); - vec_add1_aligned (fl->buffers, bi1, CLIB_CACHE_LINE_BYTES); - vec_add1_aligned (fl->buffers, bi2, CLIB_CACHE_LINE_BYTES); - vec_add1_aligned (fl->buffers, bi3, CLIB_CACHE_LINE_BYTES); - - vlib_buffer_init_for_free_list (b0, fl); - vlib_buffer_init_for_free_list (b1, fl); - vlib_buffer_init_for_free_list (b2, fl); - vlib_buffer_init_for_free_list (b3, fl); - - if (fl->buffer_init_function) - { - fl->buffer_init_function (vm, fl, &bi0, 1); - fl->buffer_init_function (vm, fl, &bi1, 1); - fl->buffer_init_function (vm, fl, &bi2, 1); - fl->buffer_init_function (vm, fl, &bi3, 1); - } - i += 4; - } - - while (i < n) - { - mb0 = vm->mbuf_alloc_list[i]; - - ASSERT (rte_mbuf_refcnt_read (mb0) == 0); - rte_mbuf_refcnt_set (mb0, 1); - - b0 = vlib_buffer_from_rte_mbuf (mb0); - bi0 = vlib_get_buffer_index (vm, b0); - - vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); - - vlib_buffer_init_for_free_list (b0, fl); - - if (fl->buffer_init_function) - fl->buffer_init_function (vm, fl, &bi0, 1); - i++; - } - - fl->n_alloc += n; - - return n; -} - -static u32 -alloc_from_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * free_list, - u32 * alloc_buffers, u32 n_alloc_buffers) -{ - u32 *dst, *src; - uword len, n_filled; - - dst = alloc_buffers; - - n_filled = fill_free_list (vm, free_list, n_alloc_buffers); - if (n_filled == 0) - return 0; - - len = vec_len (free_list->buffers); - ASSERT (len >= n_alloc_buffers); - - src = free_list->buffers + len - n_alloc_buffers; - clib_memcpy (dst, src, n_alloc_buffers * sizeof (u32)); - - _vec_len (free_list->buffers) -= n_alloc_buffers; - - return n_alloc_buffers; -} - -/* Allocate a given number of buffers into given array. - Returns number actually allocated which will be either zero or - number requested. */ -u32 -dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - - return alloc_from_free_list - (vm, - pool_elt_at_index (bm->buffer_free_list_pool, - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), - buffers, n_buffers); -} - - -u32 -dpdk_buffer_alloc_from_free_list (vlib_main_t * vm, - u32 * buffers, - u32 n_buffers, u32 free_list_index) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *f; - f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); - return alloc_from_free_list (vm, f, buffers, n_buffers); -} - -static_always_inline void -vlib_buffer_free_inline (vlib_main_t * vm, - u32 * buffers, u32 n_buffers, u32 follow_buffer_next) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *fl; - u32 fi; - int i; - u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, - u32 follow_buffer_next); - - cb = bm->buffer_free_callback; - - if (PREDICT_FALSE (cb != 0)) - n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); - - if (!n_buffers) - return; - - for (i = 0; i < n_buffers; i++) - { - vlib_buffer_t *b; - - b = vlib_get_buffer (vm, buffers[i]); - - fl = vlib_buffer_get_buffer_free_list (vm, b, &fi); - - /* The only current use of this callback: multicast recycle */ - if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) - { - int j; - - vlib_buffer_add_to_free_list - (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); - - for (j = 0; j < vec_len (bm->announce_list); j++) - { - if (fl == bm->announce_list[j]) - goto already_announced; - } - vec_add1 (bm->announce_list, fl); - already_announced: - ; - } - else - { - if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) - dpdk_rte_pktmbuf_free (vm, b); - } - } - if (vec_len (bm->announce_list)) - { - vlib_buffer_free_list_t *fl; - for (i = 0; i < vec_len (bm->announce_list); i++) - { - fl = bm->announce_list[i]; - fl->buffers_added_to_freelist_function (vm, fl); - } - _vec_len (bm->announce_list) = 0; - } -} - -static void -dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ - 1); -} - -static void -dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ - 0); -} - -static void -dpdk_packet_template_init (vlib_main_t * vm, - void *vt, - void *packet_data, - uword n_packet_data_bytes, - uword min_n_buffers_each_physmem_alloc, u8 * name) -{ - vlib_packet_template_t *t = (vlib_packet_template_t *) vt; - - vlib_worker_thread_barrier_sync (vm); - memset (t, 0, sizeof (t[0])); - - vec_add (t->packet_data, packet_data, n_packet_data_bytes); - - vlib_worker_thread_barrier_release (vm); -} - -clib_error_t * -vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, - unsigned socket_id) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_physmem_main_t *vpm = &vm->physmem_main; - struct rte_mempool *rmp; - int i; - - vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); - - /* pool already exists, nothing to do */ - if (dm->pktmbuf_pools[socket_id]) - return 0; - - u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); - - rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ - num_mbufs, /* number of mbufs */ - 512, /* cache size */ - VLIB_BUFFER_HDR_SIZE, /* priv size */ - VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ - socket_id); /* cpu socket */ - - if (rmp) - { - { - uword this_pool_end; - uword this_pool_start; - uword this_pool_size; - uword save_vpm_start, save_vpm_end, save_vpm_size; - struct rte_mempool_memhdr *memhdr; - - this_pool_start = ~0ULL; - this_pool_end = 0LL; - - STAILQ_FOREACH (memhdr, &rmp->mem_list, next) - { - if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) - this_pool_end = (uword) (memhdr->addr + memhdr->len); - if (((uword) memhdr->addr) < this_pool_start) - this_pool_start = (uword) (memhdr->addr); - } - ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); - this_pool_size = this_pool_end - this_pool_start; - - if (CLIB_DEBUG > 1) - { - clib_warning ("%s: pool start %llx pool end %llx pool size %lld", - pool_name, this_pool_start, this_pool_end, - this_pool_size); - clib_warning - ("before: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - save_vpm_start = vpm->virtual.start; - save_vpm_end = vpm->virtual.end; - save_vpm_size = vpm->virtual.size; - - if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) - vpm->virtual.start = this_pool_start; - if (this_pool_end > vpm->virtual.end) - vpm->virtual.end = this_pool_end; - - vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; - - if (CLIB_DEBUG > 1) - { - clib_warning - ("after: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - /* check if fits into buffer index range */ - if ((u64) vpm->virtual.size > - ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) - { - clib_warning ("physmem: virtual size out of range!"); - vpm->virtual.start = save_vpm_start; - vpm->virtual.end = save_vpm_end; - vpm->virtual.size = save_vpm_size; - rmp = 0; - } - } - if (rmp) - { - dm->pktmbuf_pools[socket_id] = rmp; - vec_free (pool_name); - return 0; - } - } - - vec_free (pool_name); - - /* no usable pool for this socket, try to use pool from another one */ - for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) - { - if (dm->pktmbuf_pools[i]) - { - clib_warning - ("WARNING: Failed to allocate mempool for CPU socket %u. " - "Threads running on socket %u will use socket %u mempool.", - socket_id, socket_id, i); - dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i]; - return 0; - } - } - - return clib_error_return (0, "failed to allocate mempool on socket %u", - socket_id); -} - -#if CLIB_DEBUG > 0 - -u32 *vlib_buffer_state_validation_lock; -uword *vlib_buffer_state_validation_hash; -void *vlib_buffer_state_heap; - -static clib_error_t * -buffer_state_validation_init (vlib_main_t * vm) -{ - void *oldheap; - - vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); - - oldheap = clib_mem_set_heap (vlib_buffer_state_heap); - - vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); - vec_validate_aligned (vlib_buffer_state_validation_lock, 0, - CLIB_CACHE_LINE_BYTES); - clib_mem_set_heap (oldheap); - return 0; -} - -VLIB_INIT_FUNCTION (buffer_state_validation_init); -#endif - -static vlib_buffer_callbacks_t callbacks = { - .vlib_buffer_alloc_cb = &dpdk_buffer_alloc, - .vlib_buffer_alloc_from_free_list_cb = &dpdk_buffer_alloc_from_free_list, - .vlib_buffer_free_cb = &dpdk_buffer_free, - .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next, - .vlib_packet_template_init_cb = &dpdk_packet_template_init, - .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list, -}; - -static clib_error_t * -dpdk_buffer_init (vlib_main_t * vm) -{ - vlib_buffer_cb_register (vm, &callbacks); - return 0; -} - -VLIB_INIT_FUNCTION (dpdk_buffer_init); - -/** @endcond */ -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c deleted file mode 100644 index 99998862..00000000 --- a/src/vnet/devices/dpdk/cli.c +++ /dev/null @@ -1,2079 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "dpdk_priv.h" - -/** - * @file - * @brief CLI for DPDK Abstraction Layer and pcap Tx Trace. - * - * This file contains the source code for CLI for DPDK - * Abstraction Layer and pcap Tx Trace. - */ - - -static clib_error_t * -get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd, - dpdk_device_config_t ** devconf) -{ - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - struct rte_eth_dev_info dev_info; - uword *p = 0; - clib_error_t *error = NULL; - - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - if (subport_id != 0) - { - error = clib_error_return (0, "Invalid subport"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - *xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get ((*xd)->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - (*devconf) = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - (*devconf) = &dm->conf->default_devconf; - -done: - return error; -} - -static clib_error_t * -pcap_trace_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ -#define PCAP_DEF_PKT_TO_CAPTURE (100) - - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - u8 *filename; - u8 *chroot_filename = 0; - u32 max = 0; - int enabled = 0; - int errorFlag = 0; - clib_error_t *error = 0; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "on")) - { - if (dm->tx_pcap_enable == 0) - { - enabled = 1; - } - else - { - vlib_cli_output (vm, "pcap tx capture already on..."); - errorFlag = 1; - break; - } - } - else if (unformat (line_input, "off")) - { - if (dm->tx_pcap_enable) - { - vlib_cli_output (vm, "captured %d pkts...", - dm->pcap_main.n_packets_captured + 1); - if (dm->pcap_main.n_packets_captured) - { - dm->pcap_main.n_packets_to_capture = - dm->pcap_main.n_packets_captured; - error = pcap_write (&dm->pcap_main); - if (error) - clib_error_report (error); - else - vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); - } - - dm->tx_pcap_enable = 0; - } - else - { - vlib_cli_output (vm, "pcap tx capture already off..."); - errorFlag = 1; - break; - } - } - else if (unformat (line_input, "max %d", &max)) - { - if (dm->tx_pcap_enable) - { - vlib_cli_output (vm, - "can't change max value while pcap tx capture active..."); - errorFlag = 1; - break; - } - } - else if (unformat (line_input, "intfc %U", - unformat_vnet_sw_interface, dm->vnet_main, - &dm->pcap_sw_if_index)) - ; - - else if (unformat (line_input, "intfc any")) - { - dm->pcap_sw_if_index = 0; - } - else if (unformat (line_input, "file %s", &filename)) - { - if (dm->tx_pcap_enable) - { - vlib_cli_output (vm, - "can't change file while pcap tx capture active..."); - errorFlag = 1; - break; - } - - /* Brain-police user path input */ - if (strstr ((char *) filename, "..") - || index ((char *) filename, '/')) - { - vlib_cli_output (vm, "illegal characters in filename '%s'", - filename); - vlib_cli_output (vm, - "Hint: Only filename, do not enter directory structure."); - vec_free (filename); - errorFlag = 1; - break; - } - - chroot_filename = format (0, "/tmp/%s%c", filename, 0); - vec_free (filename); - } - else if (unformat (line_input, "status")) - { - if (dm->pcap_sw_if_index == 0) - { - vlib_cli_output (vm, "max is %d for any interface to file %s", - dm-> - pcap_pkts_to_capture ? dm->pcap_pkts_to_capture - : PCAP_DEF_PKT_TO_CAPTURE, - dm-> - pcap_filename ? dm->pcap_filename : (u8 *) - "/tmp/vpe.pcap"); - } - else - { - vlib_cli_output (vm, "max is %d for interface %U to file %s", - dm-> - pcap_pkts_to_capture ? dm->pcap_pkts_to_capture - : PCAP_DEF_PKT_TO_CAPTURE, - format_vnet_sw_if_index_name, dm->vnet_main, - dm->pcap_sw_if_index, - dm-> - pcap_filename ? dm->pcap_filename : (u8 *) - "/tmp/vpe.pcap"); - } - - if (dm->tx_pcap_enable == 0) - { - vlib_cli_output (vm, "pcap tx capture is off..."); - } - else - { - vlib_cli_output (vm, "pcap tx capture is on: %d of %d pkts...", - dm->pcap_main.n_packets_captured, - dm->pcap_main.n_packets_to_capture); - } - break; - } - - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - errorFlag = 1; - break; - } - } - unformat_free (line_input); - - - if (errorFlag == 0) - { - /* Since no error, save configured values. */ - if (chroot_filename) - { - if (dm->pcap_filename) - vec_free (dm->pcap_filename); - vec_add1 (chroot_filename, 0); - dm->pcap_filename = chroot_filename; - } - - if (max) - dm->pcap_pkts_to_capture = max; - - - if (enabled) - { - if (dm->pcap_filename == 0) - dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); - - memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); - dm->pcap_main.file_name = (char *) dm->pcap_filename; - dm->pcap_main.n_packets_to_capture = PCAP_DEF_PKT_TO_CAPTURE; - if (dm->pcap_pkts_to_capture) - dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; - - dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; - dm->tx_pcap_enable = 1; - vlib_cli_output (vm, "pcap tx capture on..."); - } - } - else if (chroot_filename) - vec_free (chroot_filename); - - - return error; -} - -/*? - * This command is used to start or stop a packet capture, or show - * the status of packet capture. - * - * This command has the following optional parameters: - * - * - on|off - Used to start or stop a packet capture. - * - * - max - Depth of local buffer. Once 'nn' number - * of packets have been received, buffer is flushed to file. Once another - * 'nn' number of packets have been received, buffer is flushed - * to file, overwriting previous write. If not entered, value defaults - * to 100. Can only be updated if packet capture is off. - * - * - intfc |any - Used to specify a given interface, - * or use 'any' to run packet capture on all interfaces. - * 'any' is the default if not provided. Settings from a previous - * packet capture are preserved, so 'any' can be used to reset - * the interface setting. - * - * - file - Used to specify the output filename. The file will - * be placed in the '/tmp' directory, so only the filename is - * supported. Directory should not be entered. If file already exists, file - * will be overwritten. If no filename is provided, '/tmp/vpe.pcap' - * will be used. Can only be updated if packet capture is off. - * - * - status - Displays the current status and configured attributes - * associated with a packet capture. If packet capture is in progress, - * 'status' also will return the number of packets currently in - * the local buffer. All additional attributes entered on command line - * with 'status' will be ingnored and not applied. - * - * @cliexpar - * Example of how to display the status of a tx packet capture when off: - * @cliexstart{pcap tx trace status} - * max is 100, for any interface to file /tmp/vpe.pcap - * pcap tx capture is off... - * @cliexend - * Example of how to start a tx packet capture: - * @cliexstart{pcap tx trace on max 35 intfc GigabitEthernet0/8/0 file vppTest.pcap} - * pcap tx capture on... - * @cliexend - * Example of how to display the status of a tx packet capture in progress: - * @cliexstart{pcap tx trace status} - * max is 35, for interface GigabitEthernet0/8/0 to file /tmp/vppTest.pcap - * pcap tx capture is on: 20 of 35 pkts... - * @cliexend - * Example of how to stop a tx packet capture: - * @cliexstart{vppctl pcap tx trace off} - * captured 21 pkts... - * saved to /tmp/vppTest.pcap... - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (pcap_trace_command, static) = { - .path = "pcap tx trace", - .short_help = - "pcap tx trace [on|off] [max ] [intfc |any] [file ] [status]", - .function = pcap_trace_command_fn, -}; -/* *INDENT-ON* */ - - -static clib_error_t * -show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - struct rte_mempool *rmp; - int i; - - for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++) - { - rmp = dpdk_main.pktmbuf_pools[i]; - if (rmp) - { - unsigned count = rte_mempool_avail_count (rmp); - unsigned free_count = rte_mempool_in_use_count (rmp); - - vlib_cli_output (vm, - "name=\"%s\" available = %7d allocated = %7d total = %7d\n", - rmp->name, (u32) count, (u32) free_count, - (u32) (count + free_count)); - } - else - { - vlib_cli_output (vm, "rte_mempool is NULL (!)\n"); - } - } - return 0; -} - -/*? - * This command displays statistics of each DPDK mempool. - * - * @cliexpar - * Example of how to display DPDK buffer data: - * @cliexstart{show dpdk buffer} - * name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = { - .path = "show dpdk buffer", - .short_help = "show dpdk buffer", - .function = show_dpdk_buffer, - .is_mp_safe = 1, -}; -/* *INDENT-ON* */ - -static clib_error_t * -test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - static u32 *allocated_buffers; - u32 n_alloc = 0; - u32 n_free = 0; - u32 first, actual_alloc; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "allocate %d", &n_alloc)) - ; - else if (unformat (input, "free %d", &n_free)) - ; - else - break; - } - - if (n_free) - { - if (vec_len (allocated_buffers) < n_free) - return clib_error_return (0, "Can't free %d, only %d allocated", - n_free, vec_len (allocated_buffers)); - - first = vec_len (allocated_buffers) - n_free; - vlib_buffer_free (vm, allocated_buffers + first, n_free); - _vec_len (allocated_buffers) = first; - } - if (n_alloc) - { - first = vec_len (allocated_buffers); - vec_validate (allocated_buffers, - vec_len (allocated_buffers) + n_alloc - 1); - - actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first, - n_alloc); - _vec_len (allocated_buffers) = first + actual_alloc; - - if (actual_alloc < n_alloc) - vlib_cli_output (vm, "WARNING: only allocated %d buffers", - actual_alloc); - } - - vlib_cli_output (vm, "Currently %d buffers allocated", - vec_len (allocated_buffers)); - - if (allocated_buffers && vec_len (allocated_buffers) == 0) - vec_free (allocated_buffers); - - return 0; -} - -/*? - * This command tests the allocation and freeing of DPDK buffers. - * If both 'allocate' and 'free' are entered on the - * same command, the 'free' is executed first. If no - * parameters are provided, this command display how many DPDK buffers - * the test command has allocated. - * - * @cliexpar - * @parblock - * - * Example of how to display how many DPDK buffer test command has allcoated: - * @cliexstart{test dpdk buffer} - * Currently 0 buffers allocated - * @cliexend - * - * Example of how to allocate DPDK buffers using the test command: - * @cliexstart{test dpdk buffer allocate 10} - * Currently 10 buffers allocated - * @cliexend - * - * Example of how to free DPDK buffers allocated by the test command: - * @cliexstart{test dpdk buffer free 10} - * Currently 0 buffers allocated - * @cliexend - * @endparblock -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = { - .path = "test dpdk buffer", - .short_help = "test dpdk buffer [allocate ] [free ]", - .function = test_dpdk_buffer, - .is_mp_safe = 1, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 nb_rx_desc = (u32) ~ 0; - u32 nb_tx_desc = (u32) ~ 0; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "tx %d", &nb_tx_desc)) - ; - else if (unformat (line_input, "rx %d", &nb_rx_desc)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - { - error = - clib_error_return (0, - "number of descriptors can be set only for " - "physical devices"); - goto done; - } - - if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) && - (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc)) - { - error = clib_error_return (0, "nothing changed"); - goto done; - } - - if (nb_rx_desc != (u32) ~ 0) - xd->nb_rx_desc = nb_rx_desc; - - if (nb_tx_desc != (u32) ~ 0) - xd->nb_tx_desc = nb_tx_desc; - - error = dpdk_port_setup (dm, xd); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command sets the number of DPDK 'rx' and - * 'tx' descriptors for the given physical interface. Use - * the command 'show hardware-interface' to display the - * current descriptor allocation. - * - * @cliexpar - * Example of how to set the DPDK interface descriptors: - * @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { - .path = "set dpdk interface descriptors", - .short_help = "set dpdk interface descriptors [rx ] [tx ]", - .function = set_dpdk_if_desc, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - int cpu; - - if (tm->n_vlib_mains == 1) - vlib_cli_output (vm, "All interfaces are handled by main thread"); - - for (cpu = 0; cpu < vec_len (dm->devices_by_cpu); cpu++) - { - if (cpu >= dm->input_cpu_first_index && - cpu < (dm->input_cpu_first_index + dm->input_cpu_count)) - vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, - vlib_worker_threads[cpu].name, - vlib_worker_threads[cpu].lcore_id); - - /* *INDENT-OFF* */ - vec_foreach(dq, dm->devices_by_cpu[cpu]) - { - u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; - vnet_hw_interface_t * hi = vnet_get_hw_interface(dm->vnet_main, hw_if_index); - vlib_cli_output(vm, " %v queue %u", hi->name, dq->queue_id); - } - /* *INDENT-ON* */ - } - return 0; -} - -/*? - * This command is used to display the thread and core each - * DPDK interface and queue is assigned too. - * - * @cliexpar - * Example of how to display the DPDK interface placement: - * @cliexstart{show dpdk interface placement} - * Thread 1 (vpp_wk_0 at lcore 1): - * GigabitEthernet0/8/0 queue 0 - * GigabitEthernet0/9/0 queue 0 - * Thread 2 (vpp_wk_1 at lcore 2): - * GigabitEthernet0/8/0 queue 1 - * GigabitEthernet0/9/0 queue 1 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = { - .path = "show dpdk interface placement", - .short_help = "show dpdk interface placement", - .function = show_dpdk_if_placement, -}; -/* *INDENT-ON* */ - -static int -dpdk_device_queue_sort (void *a1, void *a2) -{ - dpdk_device_and_queue_t *dq1 = a1; - dpdk_device_and_queue_t *dq2 = a2; - - if (dq1->device > dq2->device) - return 1; - else if (dq1->device < dq2->device) - return -1; - else if (dq1->queue_id > dq2->queue_id) - return 1; - else if (dq1->queue_id < dq2->queue_id) - return -1; - else - return 0; -} - -static clib_error_t * -set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 queue = (u32) 0; - u32 cpu = (u32) ~ 0; - int i; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "queue %d", &queue)) - ; - else if (unformat (line_input, "thread %d", &cpu)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - if (cpu < dm->input_cpu_first_index || - cpu >= (dm->input_cpu_first_index + dm->input_cpu_count)) - { - error = clib_error_return (0, "please specify valid thread id"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - for (i = 0; i < vec_len (dm->devices_by_cpu); i++) - { - /* *INDENT-OFF* */ - vec_foreach(dq, dm->devices_by_cpu[i]) - { - if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index && - queue == dq->queue_id) - { - if (cpu == i) /* nothing to do */ - goto done; - - vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]); - vec_add2(dm->devices_by_cpu[cpu], dq, 1); - dq->queue_id = queue; - dq->device = xd->device_index; - xd->cpu_socket_id_by_queue[queue] = - rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id); - - vec_sort_with_function(dm->devices_by_cpu[i], - dpdk_device_queue_sort); - - vec_sort_with_function(dm->devices_by_cpu[cpu], - dpdk_device_queue_sort); - - if (vec_len(dm->devices_by_cpu[i]) == 0) - vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, - VLIB_NODE_STATE_DISABLED); - - if (vec_len(dm->devices_by_cpu[cpu]) == 1) - vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - - goto done; - } - } - /* *INDENT-ON* */ - } - - error = clib_error_return (0, "not found"); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to assign a given interface, and optionally a - * given queue, to a different thread. This will not create a thread, - * so the thread must already exist. Use '/etc/vpp/startup.conf' - * for the initial thread creation. If the 'queue' is not provided, - * it defaults to 0. - * - * @cliexpar - * Example of how to display the DPDK interface placement: - * @cliexstart{show dpdk interface placement} - * Thread 1 (vpp_wk_0 at lcore 1): - * GigabitEthernet0/8/0 queue 0 - * GigabitEthernet0/9/0 queue 0 - * Thread 2 (vpp_wk_1 at lcore 2): - * GigabitEthernet0/8/0 queue 1 - * GigabitEthernet0/9/0 queue 1 - * @cliexend - * Example of how to assign a DPDK interface and queue to a thread: - * @cliexcmd{set dpdk interface placement GigabitEthernet0/8/0 queue 1 thread 1} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { - .path = "set dpdk interface placement", - .short_help = "set dpdk interface placement [queue ] thread ", - .function = set_dpdk_if_placement, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - int cpu; - - if (tm->n_vlib_mains == 1) - vlib_cli_output (vm, "All interfaces are handled by main thread"); - - for (cpu = 0; cpu < vec_len (dm->devices_by_hqos_cpu); cpu++) - { - if (cpu >= dm->hqos_cpu_first_index && - cpu < (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) - vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, - vlib_worker_threads[cpu].name, - vlib_worker_threads[cpu].lcore_id); - - vec_foreach (dq, dm->devices_by_hqos_cpu[cpu]) - { - u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; - vnet_hw_interface_t *hi = - vnet_get_hw_interface (dm->vnet_main, hw_if_index); - vlib_cli_output (vm, " %v queue %u", hi->name, dq->queue_id); - } - } - return 0; -} - -/*? - * This command is used to display the thread and core each - * DPDK output interface and HQoS queue is assigned too. - * - * @cliexpar - * Example of how to display the DPDK output interface and HQoS queue placement: - * @cliexstart{show dpdk interface hqos placement} - * Thread 1 (vpp_hqos-threads_0 at lcore 3): - * GigabitEthernet0/8/0 queue 0 - * Thread 2 (vpp_hqos-threads_1 at lcore 4): - * GigabitEthernet0/9/0 queue 0 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos_placement, static) = { - .path = "show dpdk interface hqos placement", - .short_help = "show dpdk interface hqos placement", - .function = show_dpdk_if_hqos_placement, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 cpu = (u32) ~ 0; - int i; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "thread %d", &cpu)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); - - if (cpu < dm->hqos_cpu_first_index || - cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) - { - error = clib_error_return (0, "please specify valid thread id"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - for (i = 0; i < vec_len (dm->devices_by_hqos_cpu); i++) - { - vec_foreach (dq, dm->devices_by_hqos_cpu[i]) - { - if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index) - { - if (cpu == i) /* nothing to do */ - goto done; - - vec_del1 (dm->devices_by_hqos_cpu[i], - dq - dm->devices_by_hqos_cpu[i]); - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->queue_id = 0; - dq->device = xd->device_index; - - vec_sort_with_function (dm->devices_by_hqos_cpu[i], - dpdk_device_queue_sort); - - vec_sort_with_function (dm->devices_by_hqos_cpu[cpu], - dpdk_device_queue_sort); - - goto done; - } - } - } - - error = clib_error_return (0, "not found"); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to assign a given DPDK output interface and - * HQoS queue to a different thread. This will not create a thread, - * so the thread must already exist. Use '/etc/vpp/startup.conf' - * for the initial thread creation. See @ref qos_doc for more details. - * - * @cliexpar - * Example of how to display the DPDK output interface and HQoS queue placement: - * @cliexstart{show dpdk interface hqos placement} - * Thread 1 (vpp_hqos-threads_0 at lcore 3): - * GigabitEthernet0/8/0 queue 0 - * Thread 2 (vpp_hqos-threads_1 at lcore 4): - * GigabitEthernet0/9/0 queue 0 - * @cliexend - * Example of how to assign a DPDK output interface and HQoS queue to a thread: - * @cliexcmd{set dpdk interface hqos placement GigabitEthernet0/8/0 thread 2} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_placement, static) = { - .path = "set dpdk interface hqos placement", - .short_help = "set dpdk interface hqos placement thread ", - .function = set_dpdk_if_hqos_placement, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 subport_id = (u32) ~ 0; - u32 pipe_id = (u32) ~ 0; - u32 profile_id = (u32) ~ 0; - int rv; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "subport %d", &subport_id)) - ; - else if (unformat (line_input, "pipe %d", &pipe_id)) - ; - else if (unformat (line_input, "profile %d", &profile_id)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = - rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, - profile_id); - if (rv) - { - error = clib_error_return (0, "pipe configuration failed"); - goto done; - } - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to change the profile associate with a HQoS pipe. The - * '' is zero based. Use the command - * 'show dpdk interface hqos' to display the content of each profile. - * See @ref qos_doc for more details. - * - * @note - * Currently there is not an API to create a new HQoS pipe profile. One is - * created by default in the code (search for 'hqos_pipe_params_default''). - * Additional profiles can be created in code and code recompiled. Then use this - * command to assign it. - * - * @cliexpar - * Example of how to assign a new profile to a HQoS pipe: - * @cliexcmd{set dpdk interface hqos pipe GigabitEthernet0/8/0 subport 0 pipe 2 profile 1} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pipe, static) = -{ - .path = "set dpdk interface hqos pipe", - .short_help = "set dpdk interface hqos pipe subport pipe " - "profile ", - .function = set_dpdk_if_hqos_pipe, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = NULL; - u32 hw_if_index = (u32) ~ 0; - u32 subport_id = (u32) ~ 0; - struct rte_sched_subport_params p; - int rv; - clib_error_t *error = NULL; - u32 tb_rate = (u32) ~ 0; - u32 tb_size = (u32) ~ 0; - u32 tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = - { (u32) ~ 0, (u32) ~ 0, (u32) ~ 0, (u32) ~ 0 }; - u32 tc_period = (u32) ~ 0; - dpdk_device_config_t *devconf = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "subport %d", &subport_id)) - ; - else if (unformat (line_input, "rate %d", &tb_rate)) - ; - else if (unformat (line_input, "bktsize %d", &tb_size)) - ; - else if (unformat (line_input, "tc0 %d", &tc_rate[0])) - ; - else if (unformat (line_input, "tc1 %d", &tc_rate[1])) - ; - else if (unformat (line_input, "tc2 %d", &tc_rate[2])) - ; - else if (unformat (line_input, "tc3 %d", &tc_rate[3])) - ; - else if (unformat (line_input, "period %d", &tc_period)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - error = get_hqos (hw_if_index, subport_id, &xd, &devconf); - - if (error == NULL) - { - /* Copy the current values over to local structure. */ - memcpy (&p, &devconf->hqos.subport[subport_id], sizeof (p)); - - /* Update local structure with input values. */ - if (tb_rate != (u32) ~ 0) - { - p.tb_rate = tb_rate; - p.tc_rate[0] = tb_rate; - p.tc_rate[1] = tb_rate; - p.tc_rate[2] = tb_rate; - p.tc_rate[3] = tb_rate; - } - if (tb_size != (u32) ~ 0) - { - p.tb_size = tb_size; - } - if (tc_rate[0] != (u32) ~ 0) - { - p.tc_rate[0] = tc_rate[0]; - } - if (tc_rate[1] != (u32) ~ 0) - { - p.tc_rate[1] = tc_rate[1]; - } - if (tc_rate[2] != (u32) ~ 0) - { - p.tc_rate[2] = tc_rate[2]; - } - if (tc_rate[3] != (u32) ~ 0) - { - p.tc_rate[3] = tc_rate[3]; - } - if (tc_period != (u32) ~ 0) - { - p.tc_period = tc_period; - } - - /* Apply changes. */ - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); - if (rv) - { - error = clib_error_return (0, "subport configuration failed"); - goto done; - } - else - { - /* Successfully applied, so save of the input values. */ - memcpy (&devconf->hqos.subport[subport_id], &p, sizeof (p)); - } - } - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to set the subport level parameters such as token - * bucket rate (bytes per seconds), token bucket size (bytes), traffic class - * rates (bytes per seconds) and token update period (Milliseconds). - * - * By default, the 'rate' is set to 1250000000 bytes/second (10GbE - * rate) and each of the four traffic classes is set to 100% of the port rate. - * If the 'rate' is updated by this command, all four traffic classes - * are assigned the same value. Each of the four traffic classes can be updated - * individually. - * - * @cliexpar - * Example of how modify the subport attributes for a 1GbE link: - * @cliexcmd{set dpdk interface hqos subport GigabitEthernet0/8/0 subport 0 rate 125000000} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_subport, static) = { - .path = "set dpdk interface hqos subport", - .short_help = "set dpdk interface hqos subport subport " - "[rate ] [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] " - "[period ]", - .function = set_dpdk_if_hqos_subport, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 tc = (u32) ~ 0; - u32 queue = (u32) ~ 0; - u32 entry = (u32) ~ 0; - u32 val, i; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "entry %d", &entry)) - ; - else if (unformat (line_input, "tc %d", &tc)) - ; - else if (unformat (line_input, "queue %d", &queue)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - if (entry >= 64) - { - error = clib_error_return (0, "invalid entry"); - goto done; - } - if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - { - error = clib_error_return (0, "invalid traffic class"); - goto done; - } - if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - { - error = clib_error_return (0, "invalid traffic class queue"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - /* Should never happen, shut up Coverity warning */ - if (p == 0) - { - error = clib_error_return (0, "no worker registrations?"); - goto done; - } - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; - for (i = 0; i < worker_thread_count; i++) - xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to set the traffic class translation table. The - * traffic class translation table is used to map 64 values (0-63) to one of - * four traffic class and one of four HQoS input queue. Use the 'show - * dpdk interface hqos' command to display the traffic class translation - * table. See @ref qos_doc for more details. - * - * This command has the following parameters: - * - * - - Used to specify the output interface. - * - * - entry - Mapped value (0-63) to assign traffic class and queue to. - * - * - tc - Traffic class (0-3) to be used by the provided mapped value. - * - * - queue - HQoS input queue (0-3) to be used by the provided mapped value. - * - * @cliexpar - * Example of how modify the traffic class translation table: - * @cliexcmd{set dpdk interface hqos tctbl GigabitEthernet0/8/0 entry 16 tc 2 queue 2} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_tctbl, static) = { - .path = "set dpdk interface hqos tctbl", - .short_help = "set dpdk interface hqos tctbl entry tc queue ", - .function = set_dpdk_if_hqos_tctbl, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - clib_error_t *error = NULL; - - /* Device specific data */ - struct rte_eth_dev_info dev_info; - dpdk_device_config_t *devconf = 0; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - /* Should never happen, shut up Coverity warning */ - if (p == 0) - return clib_error_return (0, "no worker registrations?"); - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - /* Packet field configuration */ - u64 mask = (u64) ~ 0; - u32 id = (u32) ~ 0; - u32 offset = (u32) ~ 0; - - /* HQoS params */ - u32 n_subports_per_port, n_pipes_per_subport, tctbl_size; - - u32 i; - - /* Parse input arguments */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "id subport")) - id = 0; - else if (unformat (line_input, "id pipe")) - id = 1; - else if (unformat (line_input, "id tc")) - id = 2; - else if (unformat (line_input, "id %d", &id)) - ; - else if (unformat (line_input, "offset %d", &offset)) - ; - else if (unformat (line_input, "mask %llx", &mask)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - /* Get interface */ - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get (xd->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - if (devconf->hqos_enabled == 0) - { - vlib_cli_output (vm, "HQoS disabled for this interface"); - goto done; - } - - n_subports_per_port = devconf->hqos.port.n_subports_per_port; - n_pipes_per_subport = devconf->hqos.port.n_pipes_per_subport; - tctbl_size = RTE_DIM (devconf->hqos.tc_table); - - /* Validate packet field configuration: id, offset and mask */ - if (id >= 3) - { - error = clib_error_return (0, "invalid packet field id"); - goto done; - } - - switch (id) - { - case 0: - if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0) - { - error = clib_error_return (0, "invalid subport ID mask " - "(n_subports_per_port = %u)", - n_subports_per_port); - goto done; - } - break; - case 1: - if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0) - { - error = clib_error_return (0, "invalid pipe ID mask " - "(n_pipes_per_subport = %u)", - n_pipes_per_subport); - goto done; - } - break; - case 2: - default: - if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0) - { - error = clib_error_return (0, "invalid TC table index mask " - "(TC table size = %u)", tctbl_size); - goto done; - } - } - - /* Propagate packet field configuration to all workers */ - for (i = 0; i < worker_thread_count; i++) - switch (id) - { - case 0: - xd->hqos_wt[worker_thread_first + i].hqos_field0_slabpos = offset; - xd->hqos_wt[worker_thread_first + i].hqos_field0_slabmask = mask; - xd->hqos_wt[worker_thread_first + i].hqos_field0_slabshr = - __builtin_ctzll (mask); - break; - case 1: - xd->hqos_wt[worker_thread_first + i].hqos_field1_slabpos = offset; - xd->hqos_wt[worker_thread_first + i].hqos_field1_slabmask = mask; - xd->hqos_wt[worker_thread_first + i].hqos_field1_slabshr = - __builtin_ctzll (mask); - break; - case 2: - default: - xd->hqos_wt[worker_thread_first + i].hqos_field2_slabpos = offset; - xd->hqos_wt[worker_thread_first + i].hqos_field2_slabmask = mask; - xd->hqos_wt[worker_thread_first + i].hqos_field2_slabshr = - __builtin_ctzll (mask); - } - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to set the packet fields required for classifiying the - * incoming packet. As a result of classification process, packet field - * information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, - * color) and stored in packet mbuf. - * - * This command has the following parameters: - * - * - - Used to specify the output interface. - * - * - id subport|pipe|tc - Classification occurs across three fields. - * This parameter indicates which of the three masks are being configured. Legacy - * code used 0-2 to represent these three fields, so 0-2 is still accepted. - * - subport|0 - Currently only one subport is supported, so only - * an empty mask is supported for the subport classification. - * - pipe|1 - Currently, 4096 pipes per subport are supported, so a - * 12-bit mask should be configure to map to the 0-4095 pipes. - * - tc|2 - The translation table (see 'set dpdk interface hqos - * tctbl' command) maps each value (0-63) into one of the 4 traffic classes - * per pipe. A 6-bit mask should be configure to map this field to a traffic class. - * - * - offset - Offset in the packet to apply the 64-bit mask for classification. - * The offset should be on an 8-byte boundary (0,8,16,24..). - * - * - mask - 64-bit mask to apply to packet at the given 'offset'. - * Bits must be contiguous and should not include '0x'. - * - * The default values for the 'pktfield' assumes Ethernet/IPv4/UDP packets with - * no VLAN. Adjust based on expected packet format and desired classification field. - * - 'subport' is always empty (offset 0 mask 0000000000000000) - * - By default, 'pipe' maps to the UDP payload bits 12 .. 23 (offset 40 - * mask 0000000fff000000) - * - By default, 'tc' maps to the DSCP field in IP header (offset 48 mask - * 00000000000000fc) - * - * @cliexpar - * Example of how modify the 'pipe' classification filter to match VLAN: - * @cliexcmd{set dpdk interface hqos pktfield GigabitEthernet0/8/0 id pipe offset 8 mask 0000000000000FFF} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pktfield, static) = { - .path = "set dpdk interface hqos pktfield", - .short_help = "set dpdk interface hqos pktfield id subport|pipe|tc offset " - "mask ", - .function = set_dpdk_if_hqos_pktfield, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - dpdk_device_config_hqos_t *cfg; - dpdk_device_hqos_per_hqos_thread_t *ht; - dpdk_device_hqos_per_worker_thread_t *wk; - u32 *tctbl; - u32 hw_if_index = (u32) ~ 0; - u32 profile_id, subport_id, i; - struct rte_eth_dev_info dev_info; - dpdk_device_config_t *devconf = 0; - vlib_thread_registration_t *tr; - uword *p = 0; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify interface name!!"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get (xd->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - if (devconf->hqos_enabled == 0) - { - vlib_cli_output (vm, "HQoS disabled for this interface"); - goto done; - } - - /* Detect the set of worker threads */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - - /* Should never happen, shut up Coverity warning */ - if (p == 0) - { - error = clib_error_return (0, "no worker registrations?"); - goto done; - } - - tr = (vlib_thread_registration_t *) p[0]; - - cfg = &devconf->hqos; - ht = xd->hqos_ht; - wk = &xd->hqos_wt[tr->first_index]; - tctbl = wk->hqos_tc_table; - - vlib_cli_output (vm, " Thread:"); - vlib_cli_output (vm, " Input SWQ size = %u packets", cfg->swq_size); - vlib_cli_output (vm, " Enqueue burst size = %u packets", - ht->hqos_burst_enq); - vlib_cli_output (vm, " Dequeue burst size = %u packets", - ht->hqos_burst_deq); - - vlib_cli_output (vm, - " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx (subport)", - wk->hqos_field0_slabpos, wk->hqos_field0_slabmask); - vlib_cli_output (vm, - " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx (pipe)", - wk->hqos_field1_slabpos, wk->hqos_field1_slabmask); - vlib_cli_output (vm, - " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx (tc)", - wk->hqos_field2_slabpos, wk->hqos_field2_slabmask); - vlib_cli_output (vm, - " Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)"); - vlib_cli_output (vm, - " [ 0 .. 15]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[0] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[0] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[1] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[1] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[2] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[2] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[3] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[3] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[4] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[4] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[5] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[5] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[6] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[6] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[7] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[7] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[8] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[8] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[9] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[9] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[10] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[10] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[11] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[11] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[12] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[12] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[13] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[13] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[14] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[14] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[15] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[15] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, - " [16 .. 31]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[16] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[16] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[17] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[17] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[18] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[18] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[19] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[19] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[20] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[20] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[21] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[21] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[22] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[22] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[23] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[23] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[24] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[24] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[25] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[25] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[26] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[26] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[27] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[27] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[28] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[28] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[29] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[29] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[30] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[30] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[31] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[31] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, - " [32 .. 47]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[32] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[32] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[33] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[33] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[34] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[34] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[35] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[35] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[36] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[36] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[37] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[37] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[38] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[38] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[39] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[39] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[40] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[40] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[41] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[41] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[42] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[42] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[43] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[43] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[44] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[44] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[45] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[45] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[46] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[46] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[47] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[47] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, - " [48 .. 63]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[48] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[48] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[49] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[49] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[50] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[50] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[51] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[51] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[52] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[52] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[53] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[53] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[54] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[54] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[55] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[55] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[56] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[56] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[57] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[57] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[58] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[58] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[59] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[59] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[60] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[60] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[61] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[61] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[62] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[62] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[63] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[63] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, " Port:"); - vlib_cli_output (vm, " Rate = %u bytes/second", cfg->port.rate); - vlib_cli_output (vm, " MTU = %u bytes", cfg->port.mtu); - vlib_cli_output (vm, " Frame overhead = %u bytes", - cfg->port.frame_overhead); - vlib_cli_output (vm, " Number of subports = %u", - cfg->port.n_subports_per_port); - vlib_cli_output (vm, " Number of pipes per subport = %u", - cfg->port.n_pipes_per_subport); - vlib_cli_output (vm, - " Packet queue size: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u packets", - cfg->port.qsize[0], cfg->port.qsize[1], cfg->port.qsize[2], - cfg->port.qsize[3]); - vlib_cli_output (vm, " Number of pipe profiles = %u", - cfg->port.n_pipe_profiles); - - for (subport_id = 0; subport_id < vec_len (cfg->subport); subport_id++) - { - vlib_cli_output (vm, " Subport %u:", subport_id); - vlib_cli_output (vm, " Rate = %u bytes/second", - cfg->subport[subport_id].tb_rate); - vlib_cli_output (vm, " Token bucket size = %u bytes", - cfg->subport[subport_id].tb_size); - vlib_cli_output (vm, - " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", - cfg->subport[subport_id].tc_rate[0], - cfg->subport[subport_id].tc_rate[1], - cfg->subport[subport_id].tc_rate[2], - cfg->subport[subport_id].tc_rate[3]); - vlib_cli_output (vm, " TC period = %u milliseconds", - cfg->subport[subport_id].tc_period); - } - - for (profile_id = 0; profile_id < vec_len (cfg->pipe); profile_id++) - { - vlib_cli_output (vm, " Pipe profile %u:", profile_id); - vlib_cli_output (vm, " Rate = %u bytes/second", - cfg->pipe[profile_id].tb_rate); - vlib_cli_output (vm, " Token bucket size = %u bytes", - cfg->pipe[profile_id].tb_size); - vlib_cli_output (vm, - " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", - cfg->pipe[profile_id].tc_rate[0], - cfg->pipe[profile_id].tc_rate[1], - cfg->pipe[profile_id].tc_rate[2], - cfg->pipe[profile_id].tc_rate[3]); - vlib_cli_output (vm, " TC period = %u milliseconds", - cfg->pipe[profile_id].tc_period); -#ifdef RTE_SCHED_SUBPORT_TC_OV - vlib_cli_output (vm, " TC3 oversubscription_weight = %u", - cfg->pipe[profile_id].tc_ov_weight); -#endif - - for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) - { - vlib_cli_output (vm, - " TC%u WRR weights: Q0 = %u, Q1 = %u, Q2 = %u, Q3 = %u", - i, cfg->pipe[profile_id].wrr_weights[i * 4], - cfg->pipe[profile_id].wrr_weights[i * 4 + 1], - cfg->pipe[profile_id].wrr_weights[i * 4 + 2], - cfg->pipe[profile_id].wrr_weights[i * 4 + 3]); - } - } - -#ifdef RTE_SCHED_RED - vlib_cli_output (vm, " Weighted Random Early Detection (WRED):"); - for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) - { - vlib_cli_output (vm, " TC%u min: G = %u, Y = %u, R = %u", i, - cfg->port.red_params[i][e_RTE_METER_GREEN].min_th, - cfg->port.red_params[i][e_RTE_METER_YELLOW].min_th, - cfg->port.red_params[i][e_RTE_METER_RED].min_th); - - vlib_cli_output (vm, " TC%u max: G = %u, Y = %u, R = %u", i, - cfg->port.red_params[i][e_RTE_METER_GREEN].max_th, - cfg->port.red_params[i][e_RTE_METER_YELLOW].max_th, - cfg->port.red_params[i][e_RTE_METER_RED].max_th); - - vlib_cli_output (vm, - " TC%u inverted probability: G = %u, Y = %u, R = %u", - i, cfg->port.red_params[i][e_RTE_METER_GREEN].maxp_inv, - cfg->port.red_params[i][e_RTE_METER_YELLOW].maxp_inv, - cfg->port.red_params[i][e_RTE_METER_RED].maxp_inv); - - vlib_cli_output (vm, " TC%u weight: R = %u, Y = %u, R = %u", i, - cfg->port.red_params[i][e_RTE_METER_GREEN].wq_log2, - cfg->port.red_params[i][e_RTE_METER_YELLOW].wq_log2, - cfg->port.red_params[i][e_RTE_METER_RED].wq_log2); - } -#endif - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to display details of an output interface's HQoS - * settings. - * - * @cliexpar - * Example of how to display HQoS settings for an interfaces: - * @cliexstart{show dpdk interface hqos GigabitEthernet0/8/0} - * Thread: - * Input SWQ size = 4096 packets - * Enqueue burst size = 256 packets - * Dequeue burst size = 220 packets - * Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) - * Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) - * Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) - * Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) - * [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * Port: - * Rate = 1250000000 bytes/second - * MTU = 1514 bytes - * Frame overhead = 24 bytes - * Number of subports = 1 - * Number of pipes per subport = 4096 - * Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets - * Number of pipe profiles = 2 - * Subport 0: - * Rate = 1250000000 bytes/second - * Token bucket size = 1000000 bytes - * Traffic class rate: TC0 = 1250000000, TC1 = 1250000000, TC2 = 1250000000, TC3 = 1250000000 bytes/second - * TC period = 10 milliseconds - * Pipe profile 0: - * Rate = 305175 bytes/second - * Token bucket size = 1000000 bytes - * Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second - * TC period = 40 milliseconds - * TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos, static) = { - .path = "show dpdk interface hqos", - .short_help = "show dpdk interface hqos ", - .function = show_dpdk_if_hqos, -}; - -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - clib_error_t *error = NULL; -#ifdef RTE_SCHED_COLLECT_STATS - dpdk_main_t *dm = &dpdk_main; - u32 hw_if_index = (u32) ~ 0; - u32 subport = (u32) ~ 0; - u32 pipe = (u32) ~ 0; - u32 tc = (u32) ~ 0; - u32 tc_q = (u32) ~ 0; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - uword *p = 0; - struct rte_eth_dev_info dev_info; - dpdk_device_config_t *devconf = 0; - u32 qindex; - struct rte_sched_queue_stats stats; - u16 qlen; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - - else if (unformat (line_input, "subport %d", &subport)) - ; - - else if (unformat (line_input, "pipe %d", &pipe)) - ; - - else if (unformat (line_input, "tc %d", &tc)) - ; - - else if (unformat (line_input, "tc_q %d", &tc_q)) - ; - - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify interface name!!"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get (xd->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - if (devconf->hqos_enabled == 0) - { - vlib_cli_output (vm, "HQoS disabled for this interface"); - goto done; - } - - /* - * Figure out which queue to query. cf rte_sched_port_qindex. (Not sure why - * that method isn't made public by DPDK - how _should_ we get the queue ID?) - */ - qindex = subport * devconf->hqos.port.n_pipes_per_subport + pipe; - qindex = qindex * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + tc; - qindex = qindex * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + tc_q; - - if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) != - 0) - { - error = clib_error_return (0, "failed to read stats"); - goto done; - } - - vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value"); - vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts); - vlib_cli_output (vm, "%=24s%=16d", "Packets dropped", stats.n_pkts_dropped); -#ifdef RTE_SCHED_RED - vlib_cli_output (vm, "%=24s%=16d", "Packets dropped (RED)", - stats.n_pkts_red_dropped); -#endif - vlib_cli_output (vm, "%=24s%=16d", "Bytes", stats.n_bytes); - vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped); - -#else - - /* Get a line of input */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - vlib_cli_output (vm, "RTE_SCHED_COLLECT_STATS disabled in DPDK"); - goto done; - -#endif - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to display statistics associated with a HQoS traffic class - * queue. - * - * @note - * Statistic collection by the scheduler is disabled by default in DPDK. In order to - * turn it on, add the following line to '../vpp/dpdk/Makefile': - * - $(call set,RTE_SCHED_COLLECT_STATS,y) - * - * @cliexpar - * Example of how to display statistics of HQoS a HQoS traffic class queue: - * @cliexstart{show dpdk hqos queue GigabitEthernet0/9/0 subport 0 pipe 3181 tc 0 tc_q 0} - * Stats Parameter Value - * Packets 140 - * Packets dropped 0 - * Bytes 8400 - * Bytes dropped 0 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { - .path = "show dpdk hqos queue", - .short_help = "show dpdk hqos queue subport pipe tc tc_q ", - .function = show_dpdk_hqos_queue_stats, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_version_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ -#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c); - _("DPDK Version", "%s", rte_version ()); - _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); -#undef _ - return 0; -} - -/*? - * This command is used to display the current DPDK version and - * the list of arguments passed to DPDK when started. - * - * @cliexpar - * Example of how to display how many DPDK buffer test command has allcoated: - * @cliexstart{show dpdk version} - * DPDK Version: DPDK 16.11.0 - * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_vpe_version_command, static) = { - .path = "show dpdk version", - .short_help = "show dpdk version", - .function = show_dpdk_version_command_fn, -}; -/* *INDENT-ON* */ - -clib_error_t * -dpdk_cli_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (dpdk_cli_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c deleted file mode 100644 index 17397900..00000000 --- a/src/vnet/devices/dpdk/device.c +++ /dev/null @@ -1,852 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include - -#include "dpdk_priv.h" -#include - -#define foreach_dpdk_tx_func_error \ - _(BAD_RETVAL, "DPDK tx function returned an error") \ - _(RING_FULL, "Tx packet drops (ring full)") \ - _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \ - _(REPL_FAIL, "Tx packet drops (replication failure)") - -typedef enum -{ -#define _(f,s) DPDK_TX_FUNC_ERROR_##f, - foreach_dpdk_tx_func_error -#undef _ - DPDK_TX_FUNC_N_ERROR, -} dpdk_tx_func_error_t; - -static char *dpdk_tx_func_error_strings[] = { -#define _(n,s) s, - foreach_dpdk_tx_func_error -#undef _ -}; - -clib_error_t * -dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address) -{ - int error; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - error = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) address); - - if (error) - { - return clib_error_return (0, "mac address set failed: %d", error); - } - else - { - vec_reset_length (xd->default_mac_address); - vec_add (xd->default_mac_address, address, sizeof (address)); - return NULL; - } -} - -clib_error_t * -dpdk_set_mc_filter (vnet_hw_interface_t * hi, - struct ether_addr mc_addr_vec[], int naddr) -{ - int error; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - error = rte_eth_dev_set_mc_addr_list (xd->device_index, mc_addr_vec, naddr); - - if (error) - { - return clib_error_return (0, "mc addr list failed: %d", error); - } - else - { - return NULL; - } -} - -struct rte_mbuf * -dpdk_replicate_packet_mb (vlib_buffer_t * b) -{ - dpdk_main_t *dm = &dpdk_main; - struct rte_mbuf **mbufs = 0, *s, *d; - u8 nb_segs; - unsigned socket_id = rte_socket_id (); - int i; - - ASSERT (dm->pktmbuf_pools[socket_id]); - s = rte_mbuf_from_vlib_buffer (b); - nb_segs = s->nb_segs; - vec_validate (mbufs, nb_segs - 1); - - if (rte_pktmbuf_alloc_bulk (dm->pktmbuf_pools[socket_id], mbufs, nb_segs)) - { - vec_free (mbufs); - return 0; - } - - d = mbufs[0]; - d->nb_segs = s->nb_segs; - d->data_len = s->data_len; - d->pkt_len = s->pkt_len; - d->data_off = s->data_off; - clib_memcpy (d->buf_addr, s->buf_addr, RTE_PKTMBUF_HEADROOM + s->data_len); - - for (i = 1; i < nb_segs; i++) - { - d->next = mbufs[i]; - d = mbufs[i]; - s = s->next; - d->data_len = s->data_len; - clib_memcpy (d->buf_addr, s->buf_addr, - RTE_PKTMBUF_HEADROOM + s->data_len); - } - - d = mbufs[0]; - vec_free (mbufs); - return d; -} - -static void -dpdk_tx_trace_buffer (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer) -{ - vlib_main_t *vm = vlib_get_main (); - dpdk_tx_dma_trace_t *t0; - struct rte_mbuf *mb; - - mb = rte_mbuf_from_vlib_buffer (buffer); - - t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0])); - t0->queue_index = queue_id; - t0->device_index = xd->device_index; - t0->buffer_index = buffer_index; - clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); - clib_memcpy (&t0->buffer, buffer, - sizeof (buffer[0]) - sizeof (buffer->pre_data)); - clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data, - sizeof (t0->buffer.pre_data)); -} - -static_always_inline void -dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, - int maybe_multiseg) -{ - struct rte_mbuf *mb, *first_mb, *last_mb; - - /* buffer is coming from non-dpdk source so we need to init - rte_mbuf header */ - if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_EXT_HDR_VALID) == 0)) - { - vlib_buffer_t *b2 = b; - last_mb = mb = rte_mbuf_from_vlib_buffer (b2); - rte_pktmbuf_reset (mb); - while (maybe_multiseg && (b2->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - b2 = vlib_get_buffer (vm, b2->next_buffer); - mb = rte_mbuf_from_vlib_buffer (b2); - rte_pktmbuf_reset (mb); - } - } - - last_mb = first_mb = mb = rte_mbuf_from_vlib_buffer (b); - first_mb->nb_segs = 1; - mb->data_len = b->current_length; - mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) : - b->current_length; - mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; - - while (maybe_multiseg && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - b = vlib_get_buffer (vm, b->next_buffer); - mb = rte_mbuf_from_vlib_buffer (b); - last_mb->next = mb; - last_mb = mb; - mb->data_len = b->current_length; - mb->pkt_len = b->current_length; - mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; - first_mb->nb_segs++; - if (PREDICT_FALSE (b->n_add_refs)) - { - rte_mbuf_refcnt_update (mb, b->n_add_refs); - b->n_add_refs = 0; - } - } -} - -/* - * This function calls the dpdk's tx_burst function to transmit the packets - * on the tx_vector. It manages a lock per-device if the device does not - * support multiple queues. It returns the number of packets untransmitted - * on the tx_vector. If all packets are transmitted (the normal case), the - * function returns 0. - * - * The function assumes there is at least one packet on the tx_vector. - */ -static_always_inline - u32 tx_burst_vector_internal (vlib_main_t * vm, - dpdk_device_t * xd, - struct rte_mbuf **tx_vector) -{ - dpdk_main_t *dm = &dpdk_main; - u32 n_packets; - u32 tx_head; - u32 tx_tail; - u32 n_retry; - int rv; - int queue_id; - tx_ring_hdr_t *ring; - - ring = vec_header (tx_vector, sizeof (*ring)); - - n_packets = ring->tx_head - ring->tx_tail; - - tx_head = ring->tx_head % xd->nb_tx_desc; - - /* - * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to - * unpredictable results. - */ - ASSERT (n_packets > 0); - - /* - * Check for tx_vector overflow. If this fails it is a system configuration - * error. The ring should be sized big enough to handle the largest un-flowed - * off burst from a traffic manager. A larger size also helps performance - * a bit because it decreases the probability of having to issue two tx_burst - * calls due to a ring wrap. - */ - ASSERT (n_packets < xd->nb_tx_desc); - ASSERT (ring->tx_tail == 0); - - n_retry = 16; - queue_id = vm->cpu_index; - - do - { - /* start the burst at the tail */ - tx_tail = ring->tx_tail % xd->nb_tx_desc; - - /* - * This device only supports one TX queue, - * and we're running multi-threaded... - */ - if (PREDICT_FALSE (xd->lockp != 0)) - { - queue_id = queue_id % xd->tx_q_used; - while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)) - /* zzzz */ - queue_id = (queue_id + 1) % xd->tx_q_used; - } - - if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ - { - /* no wrap, transmit in one burst */ - dpdk_device_hqos_per_worker_thread_t *hqos = - &xd->hqos_wt[vm->cpu_index]; - - ASSERT (hqos->swq != NULL); - - dpdk_hqos_metadata_set (hqos, - &tx_vector[tx_tail], tx_head - tx_tail); - rv = rte_ring_sp_enqueue_burst (hqos->swq, - (void **) &tx_vector[tx_tail], - (uint16_t) (tx_head - tx_tail)); - } - else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) - { - /* no wrap, transmit in one burst */ - rv = rte_eth_tx_burst (xd->device_index, - (uint16_t) queue_id, - &tx_vector[tx_tail], - (uint16_t) (tx_head - tx_tail)); - } - else - { - ASSERT (0); - rv = 0; - } - - if (PREDICT_FALSE (xd->lockp != 0)) - *xd->lockp[queue_id] = 0; - - if (PREDICT_FALSE (rv < 0)) - { - // emit non-fatal message, bump counter - vnet_main_t *vnm = dm->vnet_main; - vnet_interface_main_t *im = &vnm->interface_main; - u32 node_index; - - node_index = vec_elt_at_index (im->hw_interfaces, - xd->vlib_hw_if_index)->tx_node_index; - - vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); - clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, - rv); - return n_packets; // untransmitted packets - } - ring->tx_tail += (u16) rv; - n_packets -= (uint16_t) rv; - } - while (rv && n_packets && (n_retry > 0)); - - return n_packets; -} - -static_always_inline void -dpdk_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi) -{ - vlib_buffer_t *b; - struct rte_mbuf *mb; - b = vlib_get_buffer (vm, bi); - mb = rte_mbuf_from_vlib_buffer (b); - CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); -} - -static_always_inline void -dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp) -{ - dpdk_main_t *dm = &dpdk_main; - u32 my_cpu = vm->cpu_index; - struct rte_mbuf *mb_new; - - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0) - return; - - mb_new = dpdk_replicate_packet_mb (b); - if (PREDICT_FALSE (mb_new == 0)) - { - vlib_error_count (vm, node->node_index, - DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); - b->flags |= VLIB_BUFFER_REPL_FAIL; - } - else - *mbp = mb_new; - - vec_add1 (dm->recycle[my_cpu], bi); -} - -/* - * Transmits the packets on the frame to the interface associated with the - * node. It first copies packets on the frame to a tx_vector containing the - * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal - * which calls the dpdk tx_burst function. - */ -static uword -dpdk_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * f) -{ - dpdk_main_t *dm = &dpdk_main; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance); - u32 n_packets = f->n_vectors; - u32 n_left; - u32 *from; - struct rte_mbuf **tx_vector; - u16 i; - u16 nb_tx_desc = xd->nb_tx_desc; - int queue_id; - u32 my_cpu; - u32 tx_pkts = 0; - tx_ring_hdr_t *ring; - u32 n_on_ring; - - my_cpu = vm->cpu_index; - - queue_id = my_cpu; - - tx_vector = xd->tx_vectors[queue_id]; - ring = vec_header (tx_vector, sizeof (*ring)); - - n_on_ring = ring->tx_head - ring->tx_tail; - from = vlib_frame_vector_args (f); - - ASSERT (n_packets <= VLIB_FRAME_SIZE); - - if (PREDICT_FALSE (n_on_ring + n_packets > nb_tx_desc)) - { - /* - * Overflowing the ring should never happen. - * If it does then drop the whole frame. - */ - vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL, - n_packets); - - while (n_packets--) - { - u32 bi0 = from[n_packets]; - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0); - rte_pktmbuf_free (mb0); - } - return n_on_ring; - } - - if (PREDICT_FALSE (dm->tx_pcap_enable)) - { - n_left = n_packets; - while (n_left > 0) - { - u32 bi0 = from[0]; - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - if (dm->pcap_sw_if_index == 0 || - dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX]) - pcap_add_buffer (&dm->pcap_main, vm, bi0, 512); - from++; - n_left--; - } - } - - from = vlib_frame_vector_args (f); - n_left = n_packets; - i = ring->tx_head % nb_tx_desc; - - while (n_left >= 8) - { - u32 bi0, bi1, bi2, bi3; - struct rte_mbuf *mb0, *mb1, *mb2, *mb3; - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 or_flags; - - dpdk_prefetch_buffer_by_index (vm, from[4]); - dpdk_prefetch_buffer_by_index (vm, from[5]); - dpdk_prefetch_buffer_by_index (vm, from[6]); - dpdk_prefetch_buffer_by_index (vm, from[7]); - - bi0 = from[0]; - bi1 = from[1]; - bi2 = from[2]; - bi3 = from[3]; - from += 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - or_flags = b0->flags | b1->flags | b2->flags | b3->flags; - - if (or_flags & VLIB_BUFFER_NEXT_PRESENT) - { - dpdk_validate_rte_mbuf (vm, b0, 1); - dpdk_validate_rte_mbuf (vm, b1, 1); - dpdk_validate_rte_mbuf (vm, b2, 1); - dpdk_validate_rte_mbuf (vm, b3, 1); - } - else - { - dpdk_validate_rte_mbuf (vm, b0, 0); - dpdk_validate_rte_mbuf (vm, b1, 0); - dpdk_validate_rte_mbuf (vm, b2, 0); - dpdk_validate_rte_mbuf (vm, b3, 0); - } - - mb0 = rte_mbuf_from_vlib_buffer (b0); - mb1 = rte_mbuf_from_vlib_buffer (b1); - mb2 = rte_mbuf_from_vlib_buffer (b2); - mb3 = rte_mbuf_from_vlib_buffer (b3); - - if (PREDICT_FALSE (or_flags & VLIB_BUFFER_RECYCLE)) - { - dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); - dpdk_buffer_recycle (vm, node, b1, bi1, &mb1); - dpdk_buffer_recycle (vm, node, b2, bi2, &mb2); - dpdk_buffer_recycle (vm, node, b3, bi3, &mb3); - - /* dont enqueue packets if replication failed as they must - be sent back to recycle */ - if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb0; - if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb1; - if (PREDICT_TRUE ((b2->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb2; - if (PREDICT_TRUE ((b3->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb3; - } - else - { - if (PREDICT_FALSE (i + 3 >= nb_tx_desc)) - { - tx_vector[i++ % nb_tx_desc] = mb0; - tx_vector[i++ % nb_tx_desc] = mb1; - tx_vector[i++ % nb_tx_desc] = mb2; - tx_vector[i++ % nb_tx_desc] = mb3; - i %= nb_tx_desc; - } - else - { - tx_vector[i++] = mb0; - tx_vector[i++] = mb1; - tx_vector[i++] = mb2; - tx_vector[i++] = mb3; - } - } - - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) - { - if (b0->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); - if (b1->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1); - if (b2->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi2, b2); - if (b3->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi3, b3); - } - - n_left -= 4; - } - while (n_left > 0) - { - u32 bi0; - struct rte_mbuf *mb0; - vlib_buffer_t *b0; - - bi0 = from[0]; - from++; - - b0 = vlib_get_buffer (vm, bi0); - - dpdk_validate_rte_mbuf (vm, b0, 1); - - mb0 = rte_mbuf_from_vlib_buffer (b0); - dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) - if (b0->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); - - if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - { - tx_vector[i % nb_tx_desc] = mb0; - i++; - } - n_left--; - } - - /* account for additional packets in the ring */ - ring->tx_head += n_packets; - n_on_ring = ring->tx_head - ring->tx_tail; - - /* transmit as many packets as possible */ - n_packets = tx_burst_vector_internal (vm, xd, tx_vector); - - /* - * tx_pkts is the number of packets successfully transmitted - * This is the number originally on ring minus the number remaining on ring - */ - tx_pkts = n_on_ring - n_packets; - - { - /* If there is no callback then drop any non-transmitted packets */ - if (PREDICT_FALSE (n_packets)) - { - vlib_simple_counter_main_t *cm; - vnet_main_t *vnm = vnet_get_main (); - - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_TX_ERROR); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - n_packets); - - vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, - n_packets); - - while (n_packets--) - rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); - } - - /* Reset head/tail to avoid unnecessary wrap */ - ring->tx_head = 0; - ring->tx_tail = 0; - } - - /* Recycle replicated buffers */ - if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu]))) - { - vlib_buffer_free (vm, dm->recycle[my_cpu], - vec_len (dm->recycle[my_cpu])); - _vec_len (dm->recycle[my_cpu]) = 0; - } - - ASSERT (ring->tx_head >= ring->tx_tail); - - return tx_pkts; -} - -static void -dpdk_clear_hw_interface_counters (u32 instance) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance); - - /* - * Set the "last_cleared_stats" to the current stats, so that - * things appear to clear from a display perspective. - */ - dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); - - clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats)); - clib_memcpy (xd->last_cleared_xstats, xd->xstats, - vec_len (xd->last_cleared_xstats) * - sizeof (xd->last_cleared_xstats[0])); - -} - -static clib_error_t * -dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); - uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance); - int rv = 0; - - if (is_up) - { - f64 now = vlib_time_now (dm->vlib_main); - - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) - { - rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - } - - if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) - rte_eth_promiscuous_enable (xd->device_index); - else - rte_eth_promiscuous_disable (xd->device_index); - - rte_eth_allmulticast_enable (xd->device_index); - xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP; - dpdk_update_counters (xd, now); - dpdk_update_link_state (xd, now); - } - else - { - xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP; - - rte_eth_allmulticast_disable (xd->device_index); - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); - rte_eth_dev_stop (xd->device_index); - - /* For bonded interface, stop slave links */ - if (xd->pmd == VNET_DPDK_PMD_BOND) - { - u8 slink[16]; - int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16); - while (nlink >= 1) - { - u8 dpdk_port = slink[--nlink]; - rte_eth_dev_stop (dpdk_port); - } - } - } - - if (rv < 0) - clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", rv); - - return /* no error */ 0; -} - -/* - * Dynamically redirect all pkts from a specific interface - * to the specified node - */ -static void -dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, - u32 node_index) -{ - dpdk_main_t *xm = &dpdk_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); - - /* Shut off redirection */ - if (node_index == ~0) - { - xd->per_interface_next_index = node_index; - return; - } - - xd->per_interface_next_index = - vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index); -} - - -static clib_error_t * -dpdk_subif_add_del_function (vnet_main_t * vnm, - u32 hw_if_index, - struct vnet_sw_interface_t *st, int is_add) -{ - dpdk_main_t *xm = &dpdk_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); - vnet_sw_interface_t *t = (vnet_sw_interface_t *) st; - int r, vlan_offload; - u32 prev_subifs = xd->num_subifs; - clib_error_t *err = 0; - - if (is_add) - xd->num_subifs++; - else if (xd->num_subifs) - xd->num_subifs--; - - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - goto done; - - /* currently we program VLANS only for IXGBE VF and I40E VF */ - if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF)) - goto done; - - if (t->sub.eth.flags.no_tags == 1) - goto done; - - if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1)) - { - xd->num_subifs = prev_subifs; - err = clib_error_return (0, "unsupported VLAN setup"); - goto done; - } - - vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index); - vlan_offload |= ETH_VLAN_FILTER_OFFLOAD; - - if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload))) - { - xd->num_subifs = prev_subifs; - err = clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d", - xd->device_index, r); - goto done; - } - - - if ((r = - rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id, - is_add))) - { - xd->num_subifs = prev_subifs; - err = clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d", - xd->device_index, r); - goto done; - } - -done: - if (xd->num_subifs) - xd->flags |= DPDK_DEVICE_FLAG_HAVE_SUBIF; - else - xd->flags &= ~DPDK_DEVICE_FLAG_HAVE_SUBIF; - - return err; -} - -/* *INDENT-OFF* */ -VNET_DEVICE_CLASS (dpdk_device_class) = { - .name = "dpdk", - .tx_function = dpdk_interface_tx, - .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR, - .tx_function_error_strings = dpdk_tx_func_error_strings, - .format_device_name = format_dpdk_device_name, - .format_device = format_dpdk_device, - .format_tx_trace = format_dpdk_tx_dma_trace, - .clear_counters = dpdk_clear_hw_interface_counters, - .admin_up_down_function = dpdk_interface_admin_up_down, - .subif_add_del_function = dpdk_subif_add_del_function, - .rx_redirect_to_node = dpdk_set_interface_next_node, - .mac_addr_change_function = dpdk_set_mac_address, -}; - -VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx) -/* *INDENT-ON* */ - -#define UP_DOWN_FLAG_EVENT 1 - -uword -admin_up_down_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - clib_error_t *error = 0; - uword event_type; - uword *event_data = 0; - u32 sw_if_index; - u32 flags; - - while (1) - { - vlib_process_wait_for_event (vm); - - event_type = vlib_process_get_events (vm, &event_data); - - dpdk_main.admin_up_down_in_progress = 1; - - switch (event_type) - { - case UP_DOWN_FLAG_EVENT: - { - if (vec_len (event_data) == 2) - { - sw_if_index = event_data[0]; - flags = event_data[1]; - error = - vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index, - flags); - clib_error_report (error); - } - } - break; - } - - vec_reset_length (event_data); - - dpdk_main.admin_up_down_in_progress = 0; - - } - return 0; /* or not */ -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { - .function = admin_up_down_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "admin-up-down-process", - .process_log2_n_stack_bytes = 17, // 256KB -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/dir.dox b/src/vnet/devices/dpdk/dir.dox deleted file mode 100644 index 43e36753..00000000 --- a/src/vnet/devices/dpdk/dir.dox +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Doxygen directory documentation */ - -/** -@dir -@brief DPDK Abstraction Layer. - -This directory contains the source code for the DPDK abstraction layer. - -*/ -/*? %%clicmd:group_label DPDK and pcap tx %% ?*/ -/*? %%syscfg:group_label DPDK and pcap tx %% ?*/ diff --git a/src/vnet/devices/dpdk/dpdk.api b/src/vnet/devices/dpdk/dpdk.api deleted file mode 100644 index 21215d45..00000000 --- a/src/vnet/devices/dpdk/dpdk.api +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** \brief DPDK interface HQoS pipe profile set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param pipe - pipe ID within its subport - @param profile - pipe profile ID -*/ -define sw_interface_set_dpdk_hqos_pipe { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 pipe; - u32 profile; -}; - -/** \brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; -}; - -/** \brief DPDK interface HQoS subport parameters set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param tb_rate - subport token bucket rate (measured in bytes/second) - @param tb_size - subport token bucket size (measured in credits) - @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) - @param tc_period - enforcement period for rates (measured in milliseconds) -*/ -define sw_interface_set_dpdk_hqos_subport { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 tb_rate; - u32 tb_size; - u32 tc_rate[4]; - u32 tc_period; -}; - -/** \brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; -}; - -/** \brief DPDK interface HQoS tctbl entry set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param entry - entry index ID - @param tc - traffic class (0 .. 3) - @param queue - traffic class queue (0 .. 3) -*/ -define sw_interface_set_dpdk_hqos_tctbl { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 entry; - u32 tc; - u32 queue; -}; - -/** \brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; -}; - -/* - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ - \ No newline at end of file diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h deleted file mode 100644 index bf9f2768..00000000 --- a/src/vnet/devices/dpdk/dpdk.h +++ /dev/null @@ -1,487 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __included_dpdk_h__ -#define __included_dpdk_h__ - -/* $$$$ We should rename always_inline -> clib_always_inline */ -#undef always_inline - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#if CLIB_DEBUG > 0 -#define always_inline static inline -#else -#define always_inline static inline __attribute__ ((__always_inline__)) -#endif - -#include - -#define NB_MBUF (16<<10) - -extern vnet_device_class_t dpdk_device_class; -extern vlib_node_registration_t dpdk_input_node; -extern vlib_node_registration_t handoff_dispatch_node; - -#define foreach_dpdk_pmd \ - _ ("net_thunderx", THUNDERX) \ - _ ("net_e1000_em", E1000EM) \ - _ ("net_e1000_igb", IGB) \ - _ ("net_e1000_igb_vf", IGBVF) \ - _ ("net_ixgbe", IXGBE) \ - _ ("net_ixgbe_vf", IXGBEVF) \ - _ ("net_i40e", I40E) \ - _ ("net_i40e_vf", I40EVF) \ - _ ("net_virtio", VIRTIO) \ - _ ("net_enic", ENIC) \ - _ ("net_vmxnet3", VMXNET3) \ - _ ("AF_PACKET PMD", AF_PACKET) \ - _ ("rte_bond_pmd", BOND) \ - _ ("net_fm10k", FM10K) \ - _ ("net_cxgbe", CXGBE) \ - _ ("net_mlx5", MLX5) \ - _ ("net_dpaa2", DPAA2) - -typedef enum -{ - VNET_DPDK_PMD_NONE, -#define _(s,f) VNET_DPDK_PMD_##f, - foreach_dpdk_pmd -#undef _ - VNET_DPDK_PMD_UNKNOWN, /* must be last */ -} dpdk_pmd_t; - -typedef enum -{ - VNET_DPDK_PORT_TYPE_ETH_1G, - VNET_DPDK_PORT_TYPE_ETH_10G, - VNET_DPDK_PORT_TYPE_ETH_40G, - VNET_DPDK_PORT_TYPE_ETH_100G, - VNET_DPDK_PORT_TYPE_ETH_BOND, - VNET_DPDK_PORT_TYPE_ETH_SWITCH, - VNET_DPDK_PORT_TYPE_AF_PACKET, - VNET_DPDK_PORT_TYPE_UNKNOWN, -} dpdk_port_type_t; - -/* - * The header for the tx_vector in dpdk_device_t. - * Head and tail are indexes into the tx_vector and are of type - * u64 so they never overflow. - */ -typedef struct -{ - u64 tx_head; - u64 tx_tail; -} tx_ring_hdr_t; - -typedef struct -{ - struct rte_ring *swq; - - u64 hqos_field0_slabmask; - u32 hqos_field0_slabpos; - u32 hqos_field0_slabshr; - u64 hqos_field1_slabmask; - u32 hqos_field1_slabpos; - u32 hqos_field1_slabshr; - u64 hqos_field2_slabmask; - u32 hqos_field2_slabpos; - u32 hqos_field2_slabshr; - u32 hqos_tc_table[64]; -} dpdk_device_hqos_per_worker_thread_t; - -typedef struct -{ - struct rte_ring **swq; - struct rte_mbuf **pkts_enq; - struct rte_mbuf **pkts_deq; - struct rte_sched_port *hqos; - u32 hqos_burst_enq; - u32 hqos_burst_deq; - u32 pkts_enq_len; - u32 swq_pos; - u32 flush_count; -} dpdk_device_hqos_per_hqos_thread_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - volatile u32 **lockp; - - /* Instance ID */ - u32 device_index; - - u32 vlib_hw_if_index; - u32 vlib_sw_if_index; - - /* next node index if we decide to steal the rx graph arc */ - u32 per_interface_next_index; - - /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */ - struct rte_mbuf ***tx_vectors; /* one per worker thread */ - struct rte_mbuf ***rx_vectors; - - /* vector of traced contexts, per device */ - u32 **d_trace_buffers; - - dpdk_pmd_t pmd:8; - i8 cpu_socket; - - u16 flags; -#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0) -#define DPDK_DEVICE_FLAG_PROMISC (1 << 1) -#define DPDK_DEVICE_FLAG_PMD (1 << 2) -#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3) -#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4) -#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) -#define DPDK_DEVICE_FLAG_HQOS (1 << 6) - - u16 nb_tx_desc; - CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); - - u8 *interface_name_suffix; - - /* number of sub-interfaces */ - u16 num_subifs; - - /* PMD related */ - u16 tx_q_used; - u16 rx_q_used; - u16 nb_rx_desc; - u16 *cpu_socket_id_by_queue; - struct rte_eth_conf port_conf; - struct rte_eth_txconf tx_conf; - - /* HQoS related */ - dpdk_device_hqos_per_worker_thread_t *hqos_wt; - dpdk_device_hqos_per_hqos_thread_t *hqos_ht; - - /* af_packet */ - u8 af_packet_port_id; - - struct rte_eth_link link; - f64 time_last_link_update; - - struct rte_eth_stats stats; - struct rte_eth_stats last_stats; - struct rte_eth_stats last_cleared_stats; - struct rte_eth_xstat *xstats; - struct rte_eth_xstat *last_cleared_xstats; - f64 time_last_stats_update; - dpdk_port_type_t port_type; - - /* mac address */ - u8 *default_mac_address; -} dpdk_device_t; - -#define DPDK_STATS_POLL_INTERVAL (10.0) -#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */ - -#define DPDK_LINK_POLL_INTERVAL (3.0) -#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ - -typedef struct -{ - u32 device; - u16 queue_id; -} dpdk_device_and_queue_t; - -#ifndef DPDK_HQOS_DBG_BYPASS -#define DPDK_HQOS_DBG_BYPASS 0 -#endif - -#ifndef HQOS_FLUSH_COUNT_THRESHOLD -#define HQOS_FLUSH_COUNT_THRESHOLD 100000 -#endif - -typedef struct dpdk_device_config_hqos_t -{ - u32 hqos_thread; - u32 hqos_thread_valid; - - u32 swq_size; - u32 burst_enq; - u32 burst_deq; - - u32 pktfield0_slabpos; - u32 pktfield1_slabpos; - u32 pktfield2_slabpos; - u64 pktfield0_slabmask; - u64 pktfield1_slabmask; - u64 pktfield2_slabmask; - u32 tc_table[64]; - - struct rte_sched_port_params port; - struct rte_sched_subport_params *subport; - struct rte_sched_pipe_params *pipe; - uint32_t *pipe_map; -} dpdk_device_config_hqos_t; - -int dpdk_hqos_validate_mask (u64 mask, u32 n); -void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * - hqos, u32 pipe_profile_id); -void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos); -clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd, - dpdk_device_config_hqos_t * hqos); -void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, - struct rte_mbuf **pkts, u32 n_pkts); - -#define foreach_dpdk_device_config_item \ - _ (num_rx_queues) \ - _ (num_tx_queues) \ - _ (num_rx_desc) \ - _ (num_tx_desc) \ - _ (rss_fn) - -typedef struct -{ - vlib_pci_addr_t pci_addr; - u8 is_blacklisted; - u8 vlan_strip_offload; -#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0 -#define DPDK_DEVICE_VLAN_STRIP_OFF 1 -#define DPDK_DEVICE_VLAN_STRIP_ON 2 - -#define _(x) uword x; - foreach_dpdk_device_config_item -#undef _ - clib_bitmap_t * workers; - u32 hqos_enabled; - dpdk_device_config_hqos_t hqos; -} dpdk_device_config_t; - -typedef struct -{ - - /* Config stuff */ - u8 **eal_init_args; - u8 *eal_init_args_str; - u8 *uio_driver_name; - u8 no_multi_seg; - u8 enable_tcp_udp_checksum; - u8 cryptodev; - - /* Required config parameters */ - u8 coremask_set_manually; - u8 nchannels_set_manually; - u32 coremask; - u32 nchannels; - u32 num_mbufs; - u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */ - - /* - * format interface names ala xxxEthernet%d/%d/%d instead of - * xxxEthernet%x/%x/%x. - */ - u8 interface_name_format_decimal; - - /* per-device config */ - dpdk_device_config_t default_devconf; - dpdk_device_config_t *dev_confs; - uword *device_config_index_by_pci_addr; - -} dpdk_config_main_t; - -dpdk_config_main_t dpdk_config_main; - -typedef struct -{ - - /* Devices */ - dpdk_device_t *devices; - dpdk_device_and_queue_t **devices_by_cpu; - dpdk_device_and_queue_t **devices_by_hqos_cpu; - - /* per-thread recycle lists */ - u32 **recycle; - - /* buffer flags template, configurable to enable/disable tcp / udp cksum */ - u32 buffer_flags_template; - - /* vlib buffer free list, must be same size as an rte_mbuf */ - u32 vlib_buffer_free_list_index; - - /* Ethernet input node index */ - u32 ethernet_input_node_index; - - /* pcap tracing [only works if (CLIB_DEBUG > 0)] */ - int tx_pcap_enable; - pcap_main_t pcap_main; - u8 *pcap_filename; - u32 pcap_sw_if_index; - u32 pcap_pkts_to_capture; - - /* hashes */ - uword *dpdk_device_by_kni_port_id; - uword *vu_sw_if_index_by_listener_fd; - uword *vu_sw_if_index_by_sock_fd; - u32 *vu_inactive_interfaces_device_index; - - /* - * flag indicating that a posted admin up/down - * (via post_sw_interface_set_flags) is in progress - */ - u8 admin_up_down_in_progress; - - u8 use_rss; - - /* which cpus are running dpdk-input */ - int input_cpu_first_index; - int input_cpu_count; - - /* which cpus are running I/O TX */ - int hqos_cpu_first_index; - int hqos_cpu_count; - - /* control interval of dpdk link state and stat polling */ - f64 link_state_poll_interval; - f64 stat_poll_interval; - - /* Sleep for this many MS after each device poll */ - u32 poll_sleep; - - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; - dpdk_config_main_t *conf; - - /* mempool */ - struct rte_mempool **pktmbuf_pools; -} dpdk_main_t; - -dpdk_main_t dpdk_main; - -typedef struct -{ - u32 buffer_index; - u16 device_index; - u8 queue_index; - struct rte_mbuf mb; - /* Copy of VLIB buffer; packet data stored in pre_data. */ - vlib_buffer_t buffer; -} dpdk_tx_dma_trace_t; - -typedef struct -{ - u32 buffer_index; - u16 device_index; - u16 queue_index; - struct rte_mbuf mb; - vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */ - u8 data[256]; /* First 256 data bytes, used for hexdump */ -} dpdk_rx_dma_trace_t; - -void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b); - -clib_error_t *dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address); - -clib_error_t *dpdk_set_mc_filter (vnet_hw_interface_t * hi, - struct ether_addr mc_addr_vec[], int naddr); - -void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd); - -clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); - -u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); - -struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b); -struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); - -#define foreach_dpdk_error \ - _(NONE, "no error") \ - _(RX_PACKET_ERROR, "Rx packet errors") \ - _(RX_BAD_FCS, "Rx bad fcs") \ - _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \ - _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \ - _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \ - _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") - -typedef enum -{ -#define _(f,s) DPDK_ERROR_##f, - foreach_dpdk_error -#undef _ - DPDK_N_ERROR, -} dpdk_error_t; - -int dpdk_set_stat_poll_interval (f64 interval); -int dpdk_set_link_state_poll_interval (f64 interval); -void dpdk_update_link_state (dpdk_device_t * xd, f64 now); -void dpdk_device_lock_init (dpdk_device_t * xd); -void dpdk_device_lock_free (dpdk_device_t * xd); - -void dpdk_rx_trace (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, u32 * buffers, uword n_buffers); - -#define EFD_OPERATION_LESS_THAN 0 -#define EFD_OPERATION_GREATER_OR_EQUAL 1 - -format_function_t format_dpdk_device_name; -format_function_t format_dpdk_device; -format_function_t format_dpdk_tx_dma_trace; -format_function_t format_dpdk_rx_dma_trace; -format_function_t format_dpdk_rte_mbuf; -format_function_t format_dpdk_rx_rte_mbuf; -unformat_function_t unformat_socket_mem; -clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn); -clib_error_t *unformat_hqos (unformat_input_t * input, - dpdk_device_config_hqos_t * hqos); - -uword -admin_up_down_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f); - -#endif /* __included_dpdk_h__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/dpdk_api.c b/src/vnet/devices/dpdk/dpdk_api.c deleted file mode 100644 index 8faf5c2c..00000000 --- a/src/vnet/devices/dpdk/dpdk_api.c +++ /dev/null @@ -1,246 +0,0 @@ -/* - *------------------------------------------------------------------ - * dpdk_api.c - dpdk interface api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include - -#if DPDK > 0 -#include -#endif - -#include - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -#define vl_endianfun /* define message structures */ -#include -#undef vl_endianfun - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include -#undef vl_printfun - -#include - -#define foreach_vpe_api_msg \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) - -static void - vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - u32 pipe = ntohl (mp->pipe); - u32 profile = ntohl (mp->profile); - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); - - BAD_SW_IF_INDEX_LABEL; -#else - clib_warning ("setting HQoS pipe parameters without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); -} - -static void - vl_api_sw_interface_set_dpdk_hqos_subport_t_handler - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - struct rte_sched_subport_params p; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - p.tb_rate = ntohl (mp->tb_rate); - p.tb_size = ntohl (mp->tb_size); - p.tc_rate[0] = ntohl (mp->tc_rate[0]); - p.tc_rate[1] = ntohl (mp->tc_rate[1]); - p.tc_rate[2] = ntohl (mp->tc_rate[2]); - p.tc_rate[3] = ntohl (mp->tc_rate[3]); - p.tc_period = ntohl (mp->tc_period); - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); - - BAD_SW_IF_INDEX_LABEL; -#else - clib_warning - ("setting HQoS subport parameters without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); -} - -static void - vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 entry = ntohl (mp->entry); - u32 tc = ntohl (mp->tc); - u32 queue = ntohl (mp->queue); - u32 val, i; - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - { - clib_warning ("invalid traffic class !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - { - clib_warning ("invalid queue !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - - if (p == 0) - { - clib_warning ("worker thread registration AWOL !!"); - rv = VNET_API_ERROR_INVALID_VALUE_2; - goto done; - } - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; - for (i = 0; i < worker_thread_count; i++) - xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - - BAD_SW_IF_INDEX_LABEL; -done: -#else - clib_warning ("setting HQoS DSCP table entry without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); -} - -/* - * dpdk_api_hookup - * Add vpe's API message handlers to the table. - * vlib has alread mapped shared memory and - * added the client registration handlers. - * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() - */ -#define vl_msg_name_crc_list -#include -#undef vl_msg_name_crc_list - -static void -setup_message_id_table (api_main_t * am) -{ -#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); - foreach_vl_msg_name_crc_dpdk; -#undef _ -} - -static clib_error_t * -dpdk_api_hookup (vlib_main_t * vm) -{ - api_main_t *am = &api_main; - -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_api_msg; -#undef _ - - /* - * Set up the (msg_name, crc, message-id) table - */ - setup_message_id_table (am); - - return 0; -} - -VLIB_API_INIT_FUNCTION (dpdk_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/dpdk_priv.h b/src/vnet/devices/dpdk/dpdk_priv.h deleted file mode 100644 index dd40ff48..00000000 --- a/src/vnet/devices/dpdk/dpdk_priv.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) -#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) - -#define DPDK_NB_RX_DESC_DEFAULT 1024 -#define DPDK_NB_TX_DESC_DEFAULT 1024 -#define DPDK_NB_RX_DESC_VIRTIO 256 -#define DPDK_NB_TX_DESC_VIRTIO 256 - -#define I40E_DEV_ID_SFP_XL710 0x1572 -#define I40E_DEV_ID_QSFP_A 0x1583 -#define I40E_DEV_ID_QSFP_B 0x1584 -#define I40E_DEV_ID_QSFP_C 0x1585 -#define I40E_DEV_ID_10G_BASE_T 0x1586 -#define I40E_DEV_ID_VF 0x154C - -/* These args appear by themselves */ -#define foreach_eal_double_hyphen_predicate_arg \ -_(no-shconf) \ -_(no-hpet) \ -_(no-huge) \ -_(vmware-tsc-map) - -#define foreach_eal_single_hyphen_mandatory_arg \ -_(coremask, c) \ -_(nchannels, n) \ - -#define foreach_eal_single_hyphen_arg \ -_(blacklist, b) \ -_(mem-alloc-request, m) \ -_(force-ranks, r) - -/* These args are preceeded by "--" and followed by a single string */ -#define foreach_eal_double_hyphen_arg \ -_(huge-dir) \ -_(proc-type) \ -_(file-prefix) \ -_(vdev) - -static inline void -dpdk_get_xstats (dpdk_device_t * xd) -{ - int len; - if ((len = rte_eth_xstats_get (xd->device_index, NULL, 0)) > 0) - { - vec_validate (xd->xstats, len - 1); - vec_validate (xd->last_cleared_xstats, len - 1); - - len = - rte_eth_xstats_get (xd->device_index, xd->xstats, - vec_len (xd->xstats)); - - ASSERT (vec_len (xd->xstats) == len); - ASSERT (vec_len (xd->last_cleared_xstats) == len); - - _vec_len (xd->xstats) = len; - _vec_len (xd->last_cleared_xstats) = len; - - } -} - - -static inline void -dpdk_update_counters (dpdk_device_t * xd, f64 now) -{ - vlib_simple_counter_main_t *cm; - vnet_main_t *vnm = vnet_get_main (); - u32 my_cpu = os_get_cpu_number (); - u64 rxerrors, last_rxerrors; - - /* only update counters for PMD interfaces */ - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - return; - - xd->time_last_stats_update = now ? now : xd->time_last_stats_update; - clib_memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats)); - rte_eth_stats_get (xd->device_index, &xd->stats); - - /* maybe bump interface rx no buffer counter */ - if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_NO_BUF); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - xd->stats.rx_nombuf - - xd->last_stats.rx_nombuf); - } - - /* missed pkt counter */ - if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_MISS); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - xd->stats.imissed - - xd->last_stats.imissed); - } - rxerrors = xd->stats.ierrors; - last_rxerrors = xd->last_stats.ierrors; - - if (PREDICT_FALSE (rxerrors != last_rxerrors)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_ERROR); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - rxerrors - last_rxerrors); - } - - dpdk_get_xstats (xd); -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/format.c b/src/vnet/devices/dpdk/format.c deleted file mode 100644 index 1558630c..00000000 --- a/src/vnet/devices/dpdk/format.c +++ /dev/null @@ -1,754 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include - -#include "dpdk_priv.h" -#include - -#define foreach_dpdk_counter \ - _ (tx_frames_ok, opackets) \ - _ (tx_bytes_ok, obytes) \ - _ (tx_errors, oerrors) \ - _ (rx_frames_ok, ipackets) \ - _ (rx_bytes_ok, ibytes) \ - _ (rx_errors, ierrors) \ - _ (rx_missed, imissed) \ - _ (rx_no_bufs, rx_nombuf) - -#define foreach_dpdk_q_counter \ - _ (rx_frames_ok, q_ipackets) \ - _ (tx_frames_ok, q_opackets) \ - _ (rx_bytes_ok, q_ibytes) \ - _ (tx_bytes_ok, q_obytes) \ - _ (rx_errors, q_errors) - -#define foreach_dpdk_rss_hf \ - _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \ - _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \ - _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \ - _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \ - _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \ - _(ETH_RSS_IPV4, "ipv4") \ - _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \ - _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \ - _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \ - _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \ - _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \ - _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \ - _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \ - _(ETH_RSS_L2_PAYLOAD, "l2-payload") \ - _(ETH_RSS_IPV6_EX, "ipv6-ex") \ - _(ETH_RSS_IPV6, "ipv6") - - -#define foreach_dpdk_rx_offload_caps \ - _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \ - _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ - _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ - _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ - _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \ - _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip") - -#define foreach_dpdk_tx_offload_caps \ - _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \ - _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ - _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ - _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ - _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \ - _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \ - _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \ - _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \ - _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert") - -#define foreach_dpdk_pkt_rx_offload_flag \ - _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ - _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ - _ (PKT_RX_FDIR, "RX packet with FDIR infos") \ - _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \ - _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \ - _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \ - _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \ - _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \ - _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \ - _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \ - _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") - -#define foreach_dpdk_pkt_type \ - _ (L2, ETHER, "Ethernet packet") \ - _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \ - _ (L2, ETHER_ARP, "ARP packet") \ - _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \ - _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \ - _ (L2, ETHER_VLAN, "VLAN packet") \ - _ (L2, ETHER_QINQ, "QinQ packet") \ - _ (L3, IPV4, "IPv4 packet without extension headers") \ - _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \ - _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \ - _ (L3, IPV6, "IPv6 packet without extension headers") \ - _ (L3, IPV6_EXT, "IPv6 packet with extension headers") \ - _ (L3, IPV6_EXT_UNKNOWN, "IPv6 packet with or without extension headers") \ - _ (L4, TCP, "TCP packet") \ - _ (L4, UDP, "UDP packet") \ - _ (L4, FRAG, "Fragmented IP packet") \ - _ (L4, SCTP, "SCTP (Stream Control Transmission Protocol) packet") \ - _ (L4, ICMP, "ICMP packet") \ - _ (L4, NONFRAG, "Non-fragmented IP packet") \ - _ (TUNNEL, GRE, "GRE tunneling packet") \ - _ (TUNNEL, VXLAN, "VXLAN tunneling packet") \ - _ (TUNNEL, NVGRE, "NVGRE Tunneling packet") \ - _ (TUNNEL, GENEVE, "GENEVE Tunneling packet") \ - _ (TUNNEL, GRENAT, "Teredo, VXLAN or GRE Tunneling packet") \ - _ (INNER_L2, ETHER, "Inner Ethernet packet") \ - _ (INNER_L2, ETHER_VLAN, "Inner Ethernet packet with VLAN") \ - _ (INNER_L3, IPV4, "Inner IPv4 packet without extension headers") \ - _ (INNER_L3, IPV4_EXT, "Inner IPv4 packet with extension headers") \ - _ (INNER_L3, IPV4_EXT_UNKNOWN, "Inner IPv4 packet with or without extension headers") \ - _ (INNER_L3, IPV6, "Inner IPv6 packet without extension headers") \ - _ (INNER_L3, IPV6_EXT, "Inner IPv6 packet with extension headers") \ - _ (INNER_L3, IPV6_EXT_UNKNOWN, "Inner IPv6 packet with or without extension headers") \ - _ (INNER_L4, TCP, "Inner TCP packet") \ - _ (INNER_L4, UDP, "Inner UDP packet") \ - _ (INNER_L4, FRAG, "Inner fagmented IP packet") \ - _ (INNER_L4, SCTP, "Inner SCTP (Stream Control Transmission Protocol) packet") \ - _ (INNER_L4, ICMP, "Inner ICMP packet") \ - _ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet") - -#define foreach_dpdk_pkt_tx_offload_flag \ - _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \ - _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \ - _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \ - _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \ - _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") - -#define foreach_dpdk_pkt_offload_flag \ - foreach_dpdk_pkt_rx_offload_flag \ - foreach_dpdk_pkt_tx_offload_flag - -u8 * -format_dpdk_device_name (u8 * s, va_list * args) -{ - dpdk_main_t *dm = &dpdk_main; - char *devname_format; - char *device_name; - u32 i = va_arg (*args, u32); - struct rte_eth_dev_info dev_info; - u8 *ret; - - if (dm->conf->interface_name_format_decimal) - devname_format = "%s%d/%d/%d"; - else - devname_format = "%s%x/%x/%x"; - - switch (dm->devices[i].port_type) - { - case VNET_DPDK_PORT_TYPE_ETH_1G: - device_name = "GigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_10G: - device_name = "TenGigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_40G: - device_name = "FortyGigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_100G: - device_name = "HundredGigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_BOND: - return format (s, "BondEthernet%d", dm->devices[i].device_index); - - case VNET_DPDK_PORT_TYPE_ETH_SWITCH: - device_name = "EthernetSwitch"; - break; - - case VNET_DPDK_PORT_TYPE_AF_PACKET: - rte_eth_dev_info_get (i, &dev_info); - return format (s, "af_packet%d", dm->devices[i].af_packet_port_id); - - default: - case VNET_DPDK_PORT_TYPE_UNKNOWN: - device_name = "UnknownEthernet"; - break; - } - - rte_eth_dev_info_get (i, &dev_info); - - if (dev_info.pci_dev) - ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus, - dev_info.pci_dev->addr.devid, - dev_info.pci_dev->addr.function); - else - ret = format (s, "%s%d", device_name, dm->devices[i].device_index); - - if (dm->devices[i].interface_name_suffix) - return format (ret, "/%s", dm->devices[i].interface_name_suffix); - return ret; -} - -static u8 * -format_dpdk_device_type (u8 * s, va_list * args) -{ - dpdk_main_t *dm = &dpdk_main; - char *dev_type; - u32 i = va_arg (*args, u32); - - switch (dm->devices[i].pmd) - { - case VNET_DPDK_PMD_E1000EM: - dev_type = "Intel 82540EM (e1000)"; - break; - - case VNET_DPDK_PMD_IGB: - dev_type = "Intel e1000"; - break; - - case VNET_DPDK_PMD_I40E: - dev_type = "Intel X710/XL710 Family"; - break; - - case VNET_DPDK_PMD_I40EVF: - dev_type = "Intel X710/XL710 Family VF"; - break; - - case VNET_DPDK_PMD_FM10K: - dev_type = "Intel FM10000 Family Ethernet Switch"; - break; - - case VNET_DPDK_PMD_IGBVF: - dev_type = "Intel e1000 VF"; - break; - - case VNET_DPDK_PMD_VIRTIO: - dev_type = "Red Hat Virtio"; - break; - - case VNET_DPDK_PMD_IXGBEVF: - dev_type = "Intel 82599 VF"; - break; - - case VNET_DPDK_PMD_IXGBE: - dev_type = "Intel 82599"; - break; - - case VNET_DPDK_PMD_ENIC: - dev_type = "Cisco VIC"; - break; - - case VNET_DPDK_PMD_CXGBE: - dev_type = "Chelsio T4/T5"; - break; - - case VNET_DPDK_PMD_MLX5: - dev_type = "Mellanox ConnectX-4 Family"; - break; - - case VNET_DPDK_PMD_VMXNET3: - dev_type = "VMware VMXNET3"; - break; - - case VNET_DPDK_PMD_AF_PACKET: - dev_type = "af_packet"; - break; - - case VNET_DPDK_PMD_BOND: - dev_type = "Ethernet Bonding"; - break; - - case VNET_DPDK_PMD_DPAA2: - dev_type = "NXP DPAA2 Mac"; - break; - - default: - case VNET_DPDK_PMD_UNKNOWN: - dev_type = "### UNKNOWN ###"; - break; - } - - return format (s, dev_type); -} - -static u8 * -format_dpdk_link_status (u8 * s, va_list * args) -{ - dpdk_device_t *xd = va_arg (*args, dpdk_device_t *); - struct rte_eth_link *l = &xd->link; - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); - - s = format (s, "%s ", l->link_status ? "up" : "down"); - if (l->link_status) - { - u32 promisc = rte_eth_promiscuous_get (xd->device_index); - - s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ? - "full" : "half"); - s = format (s, "speed %u mtu %d %s\n", l->link_speed, - hi->max_packet_bytes, promisc ? " promisc" : ""); - } - else - s = format (s, "\n"); - - return s; -} - -#define _line_len 72 -#define _(v, str) \ -if (bitmap & v) { \ - if (format_get_indent (s) > next_split ) { \ - next_split += _line_len; \ - s = format(s,"\n%U", format_white_space, indent); \ - } \ - s = format(s, "%s ", str); \ -} - -static u8 * -format_dpdk_rss_hf_name (u8 * s, va_list * args) -{ - u64 bitmap = va_arg (*args, u64); - int next_split = _line_len; - int indent = format_get_indent (s); - - if (!bitmap) - return format (s, "none"); - - foreach_dpdk_rss_hf return s; -} - -static u8 * -format_dpdk_rx_offload_caps (u8 * s, va_list * args) -{ - u32 bitmap = va_arg (*args, u32); - int next_split = _line_len; - int indent = format_get_indent (s); - - if (!bitmap) - return format (s, "none"); - - foreach_dpdk_rx_offload_caps return s; -} - -static u8 * -format_dpdk_tx_offload_caps (u8 * s, va_list * args) -{ - u32 bitmap = va_arg (*args, u32); - int next_split = _line_len; - int indent = format_get_indent (s); - if (!bitmap) - return format (s, "none"); - - foreach_dpdk_tx_offload_caps return s; -} - -#undef _line_len -#undef _ - -u8 * -format_dpdk_device (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - int verbose = va_arg (*args, int); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); - uword indent = format_get_indent (s); - f64 now = vlib_time_now (dm->vlib_main); - struct rte_eth_dev_info di; - - dpdk_update_counters (xd, now); - dpdk_update_link_state (xd, now); - - s = format (s, "%U\n%Ucarrier %U", - format_dpdk_device_type, xd->device_index, - format_white_space, indent + 2, format_dpdk_link_status, xd); - - rte_eth_dev_info_get (xd->device_index, &di); - - if (verbose > 1 && xd->flags & DPDK_DEVICE_FLAG_PMD) - { - struct rte_pci_device *pci; - struct rte_eth_rss_conf rss_conf; - int vlan_off; - int retval; - - rss_conf.rss_key = 0; - retval = rte_eth_dev_rss_hash_conf_get (xd->device_index, &rss_conf); - if (retval < 0) - clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval); - pci = di.pci_dev; - - if (pci) - s = - format (s, - "%Upci id: device %04x:%04x subsystem %04x:%04x\n" - "%Upci address: %04x:%02x:%02x.%02x\n", - format_white_space, indent + 2, pci->id.vendor_id, - pci->id.device_id, pci->id.subsystem_vendor_id, - pci->id.subsystem_device_id, format_white_space, indent + 2, - pci->addr.domain, pci->addr.bus, pci->addr.devid, - pci->addr.function); - s = - format (s, "%Umax rx packet len: %d\n", format_white_space, - indent + 2, di.max_rx_pktlen); - s = - format (s, "%Umax num of queues: rx %d tx %d\n", format_white_space, - indent + 2, di.max_rx_queues, di.max_tx_queues); - s = - format (s, "%Upromiscuous: unicast %s all-multicast %s\n", - format_white_space, indent + 2, - rte_eth_promiscuous_get (xd->device_index) ? "on" : "off", - rte_eth_promiscuous_get (xd->device_index) ? "on" : "off"); - vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); - s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n", - format_white_space, indent + 2, - vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off", - vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off", - vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off"); - s = format (s, "%Urx offload caps: %U\n", - format_white_space, indent + 2, - format_dpdk_rx_offload_caps, di.rx_offload_capa); - s = format (s, "%Utx offload caps: %U\n", - format_white_space, indent + 2, - format_dpdk_tx_offload_caps, di.tx_offload_capa); - s = format (s, "%Urss active: %U\n" - "%Urss supported: %U\n", - format_white_space, indent + 2, - format_dpdk_rss_hf_name, rss_conf.rss_hf, - format_white_space, indent + 2, - format_dpdk_rss_hf_name, di.flow_type_rss_offloads); - } - - s = format (s, "%Urx queues %d, rx desc %d, tx queues %d, tx desc %d\n", - format_white_space, indent + 2, - xd->rx_q_used, xd->nb_rx_desc, xd->tx_q_used, xd->nb_tx_desc); - - if (xd->cpu_socket > -1) - s = format (s, "%Ucpu socket %d\n", - format_white_space, indent + 2, xd->cpu_socket); - - /* $$$ MIB counters */ - { -#define _(N, V) \ - if ((xd->stats.V - xd->last_cleared_stats.V) != 0) { \ - s = format (s, "\n%U%-40U%16Ld", \ - format_white_space, indent + 2, \ - format_c_identifier, #N, \ - xd->stats.V - xd->last_cleared_stats.V); \ - } \ - - foreach_dpdk_counter -#undef _ - } - - u8 *xs = 0; - u32 i = 0; - struct rte_eth_xstat *xstat, *last_xstat; - struct rte_eth_xstat_name *xstat_names = 0; - int len = rte_eth_xstats_get_names (xd->device_index, NULL, 0); - vec_validate (xstat_names, len - 1); - rte_eth_xstats_get_names (xd->device_index, xstat_names, len); - - ASSERT (vec_len (xd->xstats) == vec_len (xd->last_cleared_xstats)); - - /* *INDENT-OFF* */ - vec_foreach_index(i, xd->xstats) - { - u64 delta = 0; - xstat = vec_elt_at_index(xd->xstats, i); - last_xstat = vec_elt_at_index(xd->last_cleared_xstats, i); - - delta = xstat->value - last_xstat->value; - if (verbose == 2 || (verbose && delta)) - { - /* format_c_identifier doesn't like c strings inside vector */ - u8 * name = format(0,"%s", xstat_names[i].name); - xs = format(xs, "\n%U%-38U%16Ld", - format_white_space, indent + 4, - format_c_identifier, name, delta); - vec_free(name); - } - } - /* *INDENT-ON* */ - - vec_free (xstat_names); - - if (xs) - { - s = format (s, "\n%Uextended stats:%v", - format_white_space, indent + 2, xs); - vec_free (xs); - } - - return s; -} - -u8 * -format_dpdk_tx_dma_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); - dpdk_tx_dma_trace_t *t = va_arg (*va, dpdk_tx_dma_trace_t *); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); - uword indent = format_get_indent (s); - vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); - - s = format (s, "%U tx queue %d", - format_vnet_sw_interface_name, vnm, sw, t->queue_index); - - s = format (s, "\n%Ubuffer 0x%x: %U", - format_white_space, indent, - t->buffer_index, format_vlib_buffer, &t->buffer); - - s = format (s, "\n%U%U", format_white_space, indent, - format_ethernet_header_with_length, t->buffer.pre_data, - sizeof (t->buffer.pre_data)); - - return s; -} - -u8 * -format_dpdk_rx_dma_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); - dpdk_rx_dma_trace_t *t = va_arg (*va, dpdk_rx_dma_trace_t *); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); - format_function_t *f; - uword indent = format_get_indent (s); - vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); - - s = format (s, "%U rx queue %d", - format_vnet_sw_interface_name, vnm, sw, t->queue_index); - - s = format (s, "\n%Ubuffer 0x%x: %U", - format_white_space, indent, - t->buffer_index, format_vlib_buffer, &t->buffer); - - s = format (s, "\n%U%U", - format_white_space, indent, - format_dpdk_rte_mbuf, &t->mb, &t->data); - - if (vm->trace_main.verbose) - { - s = format (s, "\n%UPacket Dump%s", format_white_space, indent + 2, - t->mb.data_len > sizeof (t->data) ? " (truncated)" : ""); - s = format (s, "\n%U%U", format_white_space, indent + 4, - format_hexdump, &t->data, - t->mb.data_len > - sizeof (t->data) ? sizeof (t->data) : t->mb.data_len); - } - f = node->format_buffer; - if (!f) - f = format_hex_bytes; - s = format (s, "\n%U%U", format_white_space, indent, - f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); - - return s; -} - - -static inline u8 * -format_dpdk_pkt_types (u8 * s, va_list * va) -{ - u32 *pkt_types = va_arg (*va, u32 *); - uword indent __attribute__ ((unused)) = format_get_indent (s) + 2; - - if (!*pkt_types) - return s; - - s = format (s, "Packet Types"); - -#define _(L, F, S) \ - if ((*pkt_types & RTE_PTYPE_##L##_MASK) == RTE_PTYPE_##L##_##F) \ - { \ - s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \ - "RTE_PTYPE_" #L "_" #F, RTE_PTYPE_##L##_##F, S); \ - } - - foreach_dpdk_pkt_type -#undef _ - return s; -} - -static inline u8 * -format_dpdk_pkt_offload_flags (u8 * s, va_list * va) -{ - u64 *ol_flags = va_arg (*va, u64 *); - uword indent = format_get_indent (s) + 2; - - if (!*ol_flags) - return s; - - s = format (s, "Packet Offload Flags"); - -#define _(F, S) \ - if (*ol_flags & F) \ - { \ - s = format (s, "\n%U%s (0x%04x) %s", \ - format_white_space, indent, #F, F, S); \ - } - - foreach_dpdk_pkt_offload_flag -#undef _ - return s; -} - -u8 * -format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va) -{ - ethernet_vlan_header_tv_t *vlan_hdr = - va_arg (*va, ethernet_vlan_header_tv_t *); - - if (clib_net_to_host_u16 (vlan_hdr->type) == ETHERNET_TYPE_DOT1AD) - { - s = format (s, "%U 802.1q vlan ", - format_ethernet_vlan_tci, - clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); - vlan_hdr++; - } - - s = format (s, "%U", - format_ethernet_vlan_tci, - clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); - - return s; -} - -u8 * -format_dpdk_rte_mbuf (u8 * s, va_list * va) -{ - struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *); - ethernet_header_t *eth_hdr = va_arg (*va, ethernet_header_t *); - uword indent = format_get_indent (s) + 2; - - s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" - "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x" - "\n%Upacket_type 0x%x", - mb->port, mb->nb_segs, mb->pkt_len, - format_white_space, indent, - mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off, - mb->buf_physaddr, format_white_space, indent, mb->packet_type); - - if (mb->ol_flags) - s = format (s, "\n%U%U", format_white_space, indent, - format_dpdk_pkt_offload_flags, &mb->ol_flags); - - if ((mb->ol_flags & PKT_RX_VLAN_PKT) && - ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0)) - { - ethernet_vlan_header_tv_t *vlan_hdr = - ((ethernet_vlan_header_tv_t *) & (eth_hdr->type)); - s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); - } - - if (mb->packet_type) - s = format (s, "\n%U%U", format_white_space, indent, - format_dpdk_pkt_types, &mb->packet_type); - - return s; -} - -/* FIXME is this function used? */ -#if 0 -uword -unformat_socket_mem (unformat_input_t * input, va_list * va) -{ - uword **r = va_arg (*va, uword **); - int i = 0; - u32 mem; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, ",")) - hash_set (*r, i, 1024); - else if (unformat (input, "%u,", &mem)) - hash_set (*r, i, mem); - else if (unformat (input, "%u", &mem)) - hash_set (*r, i, mem); - else - { - unformat_put_input (input); - goto done; - } - i++; - } - -done: - return 1; -} -#endif - -clib_error_t * -unformat_rss_fn (unformat_input_t * input, uword * rss_fn) -{ - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (0) - ; -#undef _ -#define _(f, s) \ - else if (unformat (input, s)) \ - *rss_fn |= f; - - foreach_dpdk_rss_hf -#undef _ - else - { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - } - return 0; -} - -clib_error_t * -unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos) -{ - clib_error_t *error = 0; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "hqos-thread %u", &hqos->hqos_thread)) - hqos->hqos_thread_valid = 1; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - break; - } - } - - return error; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/hqos.c b/src/vnet/devices/dpdk/hqos.c deleted file mode 100644 index d68bc48f..00000000 --- a/src/vnet/devices/dpdk/hqos.c +++ /dev/null @@ -1,775 +0,0 @@ -/* - * Copyright(c) 2016 Intel Corporation. All rights reserved. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include /* enumerate all vlib messages */ - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include -#undef vl_printfun - -#include "dpdk_priv.h" - -dpdk_main_t dpdk_main; - -/*** - * - * HQoS default configuration values - * - ***/ - -static dpdk_device_config_hqos_t hqos_params_default = { - .hqos_thread_valid = 0, - - .swq_size = 4096, - .burst_enq = 256, - .burst_deq = 220, - - /* - * Packet field to identify the subport. - * - * Default value: Since only one subport is defined by default (see below: - * n_subports_per_port = 1), the subport ID is hardcoded to 0. - */ - .pktfield0_slabpos = 0, - .pktfield0_slabmask = 0, - - /* - * Packet field to identify the pipe. - * - * Default value: Assuming Ethernet/IPv4/UDP packets, UDP payload bits 12 .. 23 - */ - .pktfield1_slabpos = 40, - .pktfield1_slabmask = 0x0000000FFF000000LLU, - - /* Packet field used as index into TC translation table to identify the traffic - * class and queue. - * - * Default value: Assuming Ethernet/IPv4 packets, IPv4 DSCP field - */ - .pktfield2_slabpos = 8, - .pktfield2_slabmask = 0x00000000000000FCLLU, - .tc_table = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - }, - - /* port */ - .port = { - .name = NULL, /* Set at init */ - .socket = 0, /* Set at init */ - .rate = 1250000000, /* Assuming 10GbE port */ - .mtu = 14 + 1500, /* Assuming Ethernet/IPv4 pkt (Ethernet FCS not included) */ - .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT, - .n_subports_per_port = 1, - .n_pipes_per_subport = 4096, - .qsize = {64, 64, 64, 64}, - .pipe_profiles = NULL, /* Set at config */ - .n_pipe_profiles = 1, - -#ifdef RTE_SCHED_RED - .red_params = { - /* Traffic Class 0 Colors Green / Yellow / Red */ - [0][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [0][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [0][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - - /* Traffic Class 1 - Colors Green / Yellow / Red */ - [1][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [1][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [1][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - - /* Traffic Class 2 - Colors Green / Yellow / Red */ - [2][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [2][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [2][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - - /* Traffic Class 3 - Colors Green / Yellow / Red */ - [3][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [3][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [3][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9} - }, -#endif /* RTE_SCHED_RED */ - }, -}; - -static struct rte_sched_subport_params hqos_subport_params_default = { - .tb_rate = 1250000000, /* 10GbE line rate (measured in bytes/second) */ - .tb_size = 1000000, - .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, - .tc_period = 10, -}; - -static struct rte_sched_pipe_params hqos_pipe_params_default = { - .tb_rate = 305175, /* 10GbE line rate divided by 4K pipes */ - .tb_size = 1000000, - .tc_rate = {305175, 305175, 305175, 305175}, - .tc_period = 40, -#ifdef RTE_SCHED_SUBPORT_TC_OV - .tc_ov_weight = 1, -#endif - .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, -}; - -/*** - * - * HQoS configuration - * - ***/ - -int -dpdk_hqos_validate_mask (u64 mask, u32 n) -{ - int count = __builtin_popcountll (mask); - int pos_lead = sizeof (u64) * 8 - __builtin_clzll (mask); - int pos_trail = __builtin_ctzll (mask); - int count_expected = __builtin_popcount (n - 1); - - /* Handle the exceptions */ - if (n == 0) - return -1; /* Error */ - - if ((mask == 0) && (n == 1)) - return 0; /* OK */ - - if (((mask == 0) && (n != 1)) || ((mask != 0) && (n == 1))) - return -2; /* Error */ - - /* Check that mask is contiguous */ - if ((pos_lead - pos_trail) != count) - return -3; /* Error */ - - /* Check that mask contains the expected number of bits set */ - if (count != count_expected) - return -4; /* Error */ - - return 0; /* OK */ -} - -void -dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * - hqos, u32 pipe_profile_id) -{ - memcpy (&hqos->pipe[pipe_profile_id], &hqos_pipe_params_default, - sizeof (hqos_pipe_params_default)); -} - -void -dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos) -{ - struct rte_sched_subport_params *subport_params; - struct rte_sched_pipe_params *pipe_params; - u32 *pipe_map; - u32 i; - - memcpy (hqos, &hqos_params_default, sizeof (hqos_params_default)); - - /* pipe */ - vec_add2 (hqos->pipe, pipe_params, hqos->port.n_pipe_profiles); - - for (i = 0; i < vec_len (hqos->pipe); i++) - memcpy (&pipe_params[i], - &hqos_pipe_params_default, sizeof (hqos_pipe_params_default)); - - hqos->port.pipe_profiles = hqos->pipe; - - /* subport */ - vec_add2 (hqos->subport, subport_params, hqos->port.n_subports_per_port); - - for (i = 0; i < vec_len (hqos->subport); i++) - memcpy (&subport_params[i], - &hqos_subport_params_default, - sizeof (hqos_subport_params_default)); - - /* pipe profile */ - vec_add2 (hqos->pipe_map, - pipe_map, - hqos->port.n_subports_per_port * hqos->port.n_pipes_per_subport); - - for (i = 0; i < vec_len (hqos->pipe_map); i++) - pipe_map[i] = 0; -} - -/*** - * - * HQoS init - * - ***/ - -clib_error_t * -dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - char name[32]; - u32 subport_id, i; - int rv; - - /* Detect the set of worker threads */ - int worker_thread_first = 0; - int worker_thread_count = 0; - - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - vlib_thread_registration_t *tr = - p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - worker_thread_first = tr->first_index; - worker_thread_count = tr->count; - } - - /* Allocate the per-thread device data array */ - vec_validate_aligned (xd->hqos_wt, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - memset (xd->hqos_wt, 0, tm->n_vlib_mains * sizeof (xd->hqos_wt[0])); - - vec_validate_aligned (xd->hqos_ht, 0, CLIB_CACHE_LINE_BYTES); - memset (xd->hqos_ht, 0, sizeof (xd->hqos_ht[0])); - - /* Allocate space for one SWQ per worker thread in the I/O TX thread data structure */ - vec_validate (xd->hqos_ht->swq, worker_thread_count); - - /* SWQ */ - for (i = 0; i < worker_thread_count + 1; i++) - { - u32 swq_flags = RING_F_SP_ENQ | RING_F_SC_DEQ; - - snprintf (name, sizeof (name), "SWQ-worker%u-to-device%u", i, - xd->device_index); - xd->hqos_ht->swq[i] = - rte_ring_create (name, hqos->swq_size, xd->cpu_socket, swq_flags); - if (xd->hqos_ht->swq[i] == NULL) - return clib_error_return (0, - "SWQ-worker%u-to-device%u: rte_ring_create err", - i, xd->device_index); - } - - /* - * HQoS - */ - - /* HQoS port */ - snprintf (name, sizeof (name), "HQoS%u", xd->device_index); - hqos->port.name = strdup (name); - if (hqos->port.name == NULL) - return clib_error_return (0, "HQoS%u: strdup err", xd->device_index); - - hqos->port.socket = rte_eth_dev_socket_id (xd->device_index); - if (hqos->port.socket == SOCKET_ID_ANY) - hqos->port.socket = 0; - - xd->hqos_ht->hqos = rte_sched_port_config (&hqos->port); - if (xd->hqos_ht->hqos == NULL) - return clib_error_return (0, "HQoS%u: rte_sched_port_config err", - xd->device_index); - - /* HQoS subport */ - for (subport_id = 0; subport_id < hqos->port.n_subports_per_port; - subport_id++) - { - u32 pipe_id; - - rv = - rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, - &hqos->subport[subport_id]); - if (rv) - return clib_error_return (0, - "HQoS%u subport %u: rte_sched_subport_config err (%d)", - xd->device_index, subport_id, rv); - - /* HQoS pipe */ - for (pipe_id = 0; pipe_id < hqos->port.n_pipes_per_subport; pipe_id++) - { - u32 pos = subport_id * hqos->port.n_pipes_per_subport + pipe_id; - u32 profile_id = hqos->pipe_map[pos]; - - rv = - rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, - profile_id); - if (rv) - return clib_error_return (0, - "HQoS%u subport %u pipe %u: rte_sched_pipe_config err (%d)", - xd->device_index, subport_id, pipe_id, - rv); - } - } - - /* Set up per-thread device data for the I/O TX thread */ - xd->hqos_ht->hqos_burst_enq = hqos->burst_enq; - xd->hqos_ht->hqos_burst_deq = hqos->burst_deq; - vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1); - vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1); - xd->hqos_ht->pkts_enq_len = 0; - xd->hqos_ht->swq_pos = 0; - xd->hqos_ht->flush_count = 0; - - /* Set up per-thread device data for each worker thread */ - for (i = 0; i < worker_thread_count + 1; i++) - { - u32 tid; - if (i) - tid = worker_thread_first + (i - 1); - else - tid = i; - - xd->hqos_wt[tid].swq = xd->hqos_ht->swq[i]; - xd->hqos_wt[tid].hqos_field0_slabpos = hqos->pktfield0_slabpos; - xd->hqos_wt[tid].hqos_field0_slabmask = hqos->pktfield0_slabmask; - xd->hqos_wt[tid].hqos_field0_slabshr = - __builtin_ctzll (hqos->pktfield0_slabmask); - xd->hqos_wt[tid].hqos_field1_slabpos = hqos->pktfield1_slabpos; - xd->hqos_wt[tid].hqos_field1_slabmask = hqos->pktfield1_slabmask; - xd->hqos_wt[tid].hqos_field1_slabshr = - __builtin_ctzll (hqos->pktfield1_slabmask); - xd->hqos_wt[tid].hqos_field2_slabpos = hqos->pktfield2_slabpos; - xd->hqos_wt[tid].hqos_field2_slabmask = hqos->pktfield2_slabmask; - xd->hqos_wt[tid].hqos_field2_slabshr = - __builtin_ctzll (hqos->pktfield2_slabmask); - memcpy (xd->hqos_wt[tid].hqos_tc_table, hqos->tc_table, - sizeof (hqos->tc_table)); - } - - return 0; -} - -/*** - * - * HQoS run-time - * - ***/ -/* - * dpdk_hqos_thread - Contains the main loop of an HQoS thread. - * - * w - * Information for the current thread - */ -static_always_inline void -dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm) -{ - dpdk_main_t *dm = &dpdk_main; - u32 cpu_index = vm->cpu_index; - u32 dev_pos; - - dev_pos = 0; - while (1) - { - vlib_worker_thread_barrier_check (); - - u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); - if (dev_pos >= n_devs) - dev_pos = 0; - - dpdk_device_and_queue_t *dq = - vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); - dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); - - dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; - u32 device_index = xd->device_index; - u16 queue_id = dq->queue_id; - - struct rte_mbuf **pkts_enq = hqos->pkts_enq; - u32 pkts_enq_len = hqos->pkts_enq_len; - u32 swq_pos = hqos->swq_pos; - u32 n_swq = vec_len (hqos->swq), i; - u32 flush_count = hqos->flush_count; - - for (i = 0; i < n_swq; i++) - { - /* Get current SWQ for this device */ - struct rte_ring *swq = hqos->swq[swq_pos]; - - /* Read SWQ burst to packet buffer of this device */ - pkts_enq_len += rte_ring_sc_dequeue_burst (swq, - (void **) - &pkts_enq[pkts_enq_len], - hqos->hqos_burst_enq); - - /* Get next SWQ for this device */ - swq_pos++; - if (swq_pos >= n_swq) - swq_pos = 0; - hqos->swq_pos = swq_pos; - - /* HWQ TX enqueue when burst available */ - if (pkts_enq_len >= hqos->hqos_burst_enq) - { - u32 n_pkts = rte_eth_tx_burst (device_index, - (uint16_t) queue_id, - pkts_enq, - (uint16_t) pkts_enq_len); - - for (; n_pkts < pkts_enq_len; n_pkts++) - rte_pktmbuf_free (pkts_enq[n_pkts]); - - pkts_enq_len = 0; - flush_count = 0; - break; - } - } - if (pkts_enq_len) - { - flush_count++; - if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) - { - rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); - - pkts_enq_len = 0; - flush_count = 0; - } - } - hqos->pkts_enq_len = pkts_enq_len; - hqos->flush_count = flush_count; - - /* Advance to next device */ - dev_pos++; - } -} - -static_always_inline void -dpdk_hqos_thread_internal (vlib_main_t * vm) -{ - dpdk_main_t *dm = &dpdk_main; - u32 cpu_index = vm->cpu_index; - u32 dev_pos; - - dev_pos = 0; - while (1) - { - vlib_worker_thread_barrier_check (); - - u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); - if (PREDICT_FALSE (n_devs == 0)) - { - dev_pos = 0; - continue; - } - if (dev_pos >= n_devs) - dev_pos = 0; - - dpdk_device_and_queue_t *dq = - vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); - dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); - - dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; - u32 device_index = xd->device_index; - u16 queue_id = dq->queue_id; - - struct rte_mbuf **pkts_enq = hqos->pkts_enq; - struct rte_mbuf **pkts_deq = hqos->pkts_deq; - u32 pkts_enq_len = hqos->pkts_enq_len; - u32 swq_pos = hqos->swq_pos; - u32 n_swq = vec_len (hqos->swq), i; - u32 flush_count = hqos->flush_count; - - /* - * SWQ dequeue and HQoS enqueue for current device - */ - for (i = 0; i < n_swq; i++) - { - /* Get current SWQ for this device */ - struct rte_ring *swq = hqos->swq[swq_pos]; - - /* Read SWQ burst to packet buffer of this device */ - pkts_enq_len += rte_ring_sc_dequeue_burst (swq, - (void **) - &pkts_enq[pkts_enq_len], - hqos->hqos_burst_enq); - - /* Get next SWQ for this device */ - swq_pos++; - if (swq_pos >= n_swq) - swq_pos = 0; - hqos->swq_pos = swq_pos; - - /* HQoS enqueue when burst available */ - if (pkts_enq_len >= hqos->hqos_burst_enq) - { - rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); - - pkts_enq_len = 0; - flush_count = 0; - break; - } - } - if (pkts_enq_len) - { - flush_count++; - if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) - { - rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); - - pkts_enq_len = 0; - flush_count = 0; - } - } - hqos->pkts_enq_len = pkts_enq_len; - hqos->flush_count = flush_count; - - /* - * HQoS dequeue and HWQ TX enqueue for current device - */ - { - u32 pkts_deq_len, n_pkts; - - pkts_deq_len = rte_sched_port_dequeue (hqos->hqos, - pkts_deq, - hqos->hqos_burst_deq); - - for (n_pkts = 0; n_pkts < pkts_deq_len;) - n_pkts += rte_eth_tx_burst (device_index, - (uint16_t) queue_id, - &pkts_deq[n_pkts], - (uint16_t) (pkts_deq_len - n_pkts)); - } - - /* Advance to next device */ - dev_pos++; - } -} - -void -dpdk_hqos_thread (vlib_worker_thread_t * w) -{ - vlib_main_t *vm; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - - vm = vlib_get_main (); - - ASSERT (vm->cpu_index == os_get_cpu_number ()); - - clib_time_init (&vm->clib_time); - clib_mem_set_heap (w->thread_mheap); - - /* Wait until the dpdk init sequence is complete */ - while (tm->worker_thread_release == 0) - vlib_worker_thread_barrier_check (); - - if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0) - return - clib_error - ("current I/O TX thread does not have any devices assigned to it"); - - if (DPDK_HQOS_DBG_BYPASS) - dpdk_hqos_thread_internal_hqos_dbg_bypass (vm); - else - dpdk_hqos_thread_internal (vm); -} - -void -dpdk_hqos_thread_fn (void *arg) -{ - vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; - vlib_worker_thread_init (w); - dpdk_hqos_thread (w); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_THREAD (hqos_thread_reg, static) = -{ - .name = "hqos-threads", - .short_name = "hqos-threads", - .function = dpdk_hqos_thread_fn, -}; -/* *INDENT-ON* */ - -/* - * HQoS run-time code to be called by the worker threads - */ -#define BITFIELD(byte_array, slab_pos, slab_mask, slab_shr) \ -({ \ - u64 slab = *((u64 *) &byte_array[slab_pos]); \ - u64 val = (rte_be_to_cpu_64(slab) & slab_mask) >> slab_shr; \ - val; \ -}) - -#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, traffic_class, queue, color) \ - ((((u64) (queue)) & 0x3) | \ - ((((u64) (traffic_class)) & 0x3) << 2) | \ - ((((u64) (color)) & 0x3) << 4) | \ - ((((u64) (subport)) & 0xFFFF) << 16) | \ - ((((u64) (pipe)) & 0xFFFFFFFF) << 32)) - -void -dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, - struct rte_mbuf **pkts, u32 n_pkts) -{ - u32 i; - - for (i = 0; i < (n_pkts & (~0x3)); i += 4) - { - struct rte_mbuf *pkt0 = pkts[i]; - struct rte_mbuf *pkt1 = pkts[i + 1]; - struct rte_mbuf *pkt2 = pkts[i + 2]; - struct rte_mbuf *pkt3 = pkts[i + 3]; - - u8 *pkt0_data = rte_pktmbuf_mtod (pkt0, u8 *); - u8 *pkt1_data = rte_pktmbuf_mtod (pkt1, u8 *); - u8 *pkt2_data = rte_pktmbuf_mtod (pkt2, u8 *); - u8 *pkt3_data = rte_pktmbuf_mtod (pkt3, u8 *); - - u64 pkt0_subport = BITFIELD (pkt0_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt0_pipe = BITFIELD (pkt0_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt0_dscp = BITFIELD (pkt0_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt0_tc = hqos->hqos_tc_table[pkt0_dscp & 0x3F] >> 2; - u32 pkt0_tc_q = hqos->hqos_tc_table[pkt0_dscp & 0x3F] & 0x3; - - u64 pkt1_subport = BITFIELD (pkt1_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt1_pipe = BITFIELD (pkt1_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt1_dscp = BITFIELD (pkt1_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt1_tc = hqos->hqos_tc_table[pkt1_dscp & 0x3F] >> 2; - u32 pkt1_tc_q = hqos->hqos_tc_table[pkt1_dscp & 0x3F] & 0x3; - - u64 pkt2_subport = BITFIELD (pkt2_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt2_pipe = BITFIELD (pkt2_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt2_dscp = BITFIELD (pkt2_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt2_tc = hqos->hqos_tc_table[pkt2_dscp & 0x3F] >> 2; - u32 pkt2_tc_q = hqos->hqos_tc_table[pkt2_dscp & 0x3F] & 0x3; - - u64 pkt3_subport = BITFIELD (pkt3_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt3_pipe = BITFIELD (pkt3_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt3_dscp = BITFIELD (pkt3_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt3_tc = hqos->hqos_tc_table[pkt3_dscp & 0x3F] >> 2; - u32 pkt3_tc_q = hqos->hqos_tc_table[pkt3_dscp & 0x3F] & 0x3; - - u64 pkt0_sched = RTE_SCHED_PORT_HIERARCHY (pkt0_subport, - pkt0_pipe, - pkt0_tc, - pkt0_tc_q, - 0); - u64 pkt1_sched = RTE_SCHED_PORT_HIERARCHY (pkt1_subport, - pkt1_pipe, - pkt1_tc, - pkt1_tc_q, - 0); - u64 pkt2_sched = RTE_SCHED_PORT_HIERARCHY (pkt2_subport, - pkt2_pipe, - pkt2_tc, - pkt2_tc_q, - 0); - u64 pkt3_sched = RTE_SCHED_PORT_HIERARCHY (pkt3_subport, - pkt3_pipe, - pkt3_tc, - pkt3_tc_q, - 0); - - pkt0->hash.sched.lo = pkt0_sched & 0xFFFFFFFF; - pkt0->hash.sched.hi = pkt0_sched >> 32; - pkt1->hash.sched.lo = pkt1_sched & 0xFFFFFFFF; - pkt1->hash.sched.hi = pkt1_sched >> 32; - pkt2->hash.sched.lo = pkt2_sched & 0xFFFFFFFF; - pkt2->hash.sched.hi = pkt2_sched >> 32; - pkt3->hash.sched.lo = pkt3_sched & 0xFFFFFFFF; - pkt3->hash.sched.hi = pkt3_sched >> 32; - } - - for (; i < n_pkts; i++) - { - struct rte_mbuf *pkt = pkts[i]; - - u8 *pkt_data = rte_pktmbuf_mtod (pkt, u8 *); - - u64 pkt_subport = BITFIELD (pkt_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt_pipe = BITFIELD (pkt_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt_dscp = BITFIELD (pkt_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt_tc = hqos->hqos_tc_table[pkt_dscp & 0x3F] >> 2; - u32 pkt_tc_q = hqos->hqos_tc_table[pkt_dscp & 0x3F] & 0x3; - - u64 pkt_sched = RTE_SCHED_PORT_HIERARCHY (pkt_subport, - pkt_pipe, - pkt_tc, - pkt_tc_q, - 0); - - pkt->hash.sched.lo = pkt_sched & 0xFFFFFFFF; - pkt->hash.sched.hi = pkt_sched >> 32; - } -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c deleted file mode 100755 index 29423e15..00000000 --- a/src/vnet/devices/dpdk/init.c +++ /dev/null @@ -1,1801 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dpdk_priv.h" - -dpdk_main_t dpdk_main; - -/* force linker to link functions used by vlib and declared weak */ -void *vlib_weakly_linked_functions[] = { - &rte_pktmbuf_init, - &rte_pktmbuf_pool_init, -}; - -#define LINK_STATE_ELOGS 0 - -#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" -#define VPP_RUN_DIR "/run/vpp" - -/* Port configuration, mildly modified Intel app values */ - -static struct rte_eth_conf port_conf_template = { - .rxmode = { - .split_hdr_size = 0, - .header_split = 0, /**< Header Split disabled */ - .hw_ip_checksum = 0, /**< IP checksum offload disabled */ - .hw_vlan_filter = 0, /**< VLAN filtering disabled */ - .hw_strip_crc = 0, /**< CRC stripped by hardware */ - }, - .txmode = { - .mq_mode = ETH_MQ_TX_NONE, - }, -}; - -clib_error_t * -dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) -{ - int rv; - int j; - - ASSERT (os_get_cpu_number () == 0); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); - rte_eth_dev_stop (xd->device_index); - } - - rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, - xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - /* Set up one TX-queue per worker thread */ - for (j = 0; j < xd->tx_q_used; j++) - { - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - xd->cpu_socket, &xd->tx_conf); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - SOCKET_ID_ANY, &xd->tx_conf); - if (rv < 0) - break; - } - - if (rv < 0) - return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", - xd->device_index, rv); - - for (j = 0; j < xd->rx_q_used; j++) - { - - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - xd->cpu_socket, 0, - dm-> - pktmbuf_pools[xd->cpu_socket_id_by_queue - [j]]); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - SOCKET_ID_ANY, 0, - dm-> - pktmbuf_pools[xd->cpu_socket_id_by_queue - [j]]); - if (rv < 0) - return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", - xd->device_index, rv); - } - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv; - rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } - return 0; -} - -static u32 -dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - u32 old = 0; - - if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags)) - { - old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0; - - if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) - xd->flags |= DPDK_DEVICE_FLAG_PROMISC; - else - xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC; - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) - rte_eth_promiscuous_enable (xd->device_index); - else - rte_eth_promiscuous_disable (xd->device_index); - } - } - else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) - { - /* - * DAW-FIXME: The Cisco VIC firmware does not provide an api for a - * driver to dynamically change the mtu. If/when the - * VIC firmware gets fixed, then this should be removed. - */ - if (xd->pmd == VNET_DPDK_PMD_ENIC) - { - struct rte_eth_dev_info dev_info; - - /* - * Restore mtu to what has been set by CIMC in the firmware cfg. - */ - rte_eth_dev_info_get (xd->device_index, &dev_info); - hi->max_packet_bytes = dev_info.max_rx_pktlen; - - vlib_cli_output (vlib_get_main (), - "Cisco VIC mtu can only be changed " - "using CIMC then rebooting the server!"); - } - else - { - int rv; - - xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - rte_eth_dev_stop (xd->device_index); - - rv = rte_eth_dev_configure - (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - vlib_cli_output (vlib_get_main (), - "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } - } - } - return old; -} - -void -dpdk_device_lock_init (dpdk_device_t * xd) -{ - int q; - vec_validate (xd->lockp, xd->tx_q_used - 1); - for (q = 0; q < xd->tx_q_used; q++) - { - xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); - } -} - -void -dpdk_device_lock_free (dpdk_device_t * xd) -{ - int q; - - for (q = 0; q < vec_len (xd->lockp); q++) - clib_mem_free ((void *) xd->lockp[q]); - vec_free (xd->lockp); - xd->lockp = 0; -} - -static clib_error_t * -dpdk_lib_init (dpdk_main_t * dm) -{ - u32 nports; - u32 nb_desc = 0; - int i; - clib_error_t *error; - vlib_main_t *vm = vlib_get_main (); - vlib_thread_main_t *tm = vlib_get_thread_main (); - vnet_sw_interface_t *sw; - vnet_hw_interface_t *hi; - dpdk_device_t *xd; - vlib_pci_addr_t last_pci_addr; - u32 last_pci_addr_port = 0; - vlib_thread_registration_t *tr, *tr_hqos; - uword *p, *p_hqos; - - u32 next_cpu = 0, next_hqos_cpu = 0; - u8 af_packet_port_id = 0; - last_pci_addr.as_u32 = ~0; - - dm->input_cpu_first_index = 0; - dm->input_cpu_count = 1; - - /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - dm->input_cpu_first_index = tr->first_index; - dm->input_cpu_count = tr->count; - } - - vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - dm->hqos_cpu_first_index = 0; - dm->hqos_cpu_count = 0; - - /* find out which cpus will be used for I/O TX */ - p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads"); - tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0; - - if (tr_hqos && tr_hqos->count > 0) - { - dm->hqos_cpu_first_index = tr_hqos->first_index; - dm->hqos_cpu_count = tr_hqos->count; - } - - vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - nports = rte_eth_dev_count (); - if (nports < 1) - { - clib_warning ("DPDK drivers found no ports..."); - } - - if (CLIB_DEBUG > 0) - clib_warning ("DPDK drivers found %d ports...", nports); - - /* - * All buffers are all allocated from the same rte_mempool. - * Thus they all have the same number of data bytes. - */ - dm->vlib_buffer_free_list_index = - vlib_buffer_get_or_create_free_list (vm, - VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, - "dpdk rx"); - - if (dm->conf->enable_tcp_udp_checksum) - dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT - | IP_BUFFER_L4_CHECKSUM_COMPUTED); - - for (i = 0; i < nports; i++) - { - u8 addr[6]; - u8 vlan_strip = 0; - int j; - struct rte_eth_dev_info dev_info; - clib_error_t *rv; - struct rte_eth_link l; - dpdk_device_config_t *devconf = 0; - vlib_pci_addr_t pci_addr; - uword *p = 0; - - rte_eth_dev_info_get (i, &dev_info); - if (dev_info.pci_dev) /* bonded interface has no pci info */ - { - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - p = - hash_get (dm->conf->device_config_index_by_pci_addr, - pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - /* Create vnet interface */ - vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); - xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; - xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; - xd->cpu_socket = (i8) rte_eth_dev_socket_id (i); - - /* Handle interface naming for devices with multiple ports sharing same PCI ID */ - if (dev_info.pci_dev) - { - struct rte_eth_dev_info di = { 0 }; - rte_eth_dev_info_get (i + 1, &di); - if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 && - memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr, - sizeof (struct rte_pci_addr)) == 0) - { - xd->interface_name_suffix = format (0, "0"); - last_pci_addr.as_u32 = pci_addr.as_u32; - last_pci_addr_port = i; - } - else if (pci_addr.as_u32 == last_pci_addr.as_u32) - { - xd->interface_name_suffix = - format (0, "%u", i - last_pci_addr_port); - } - else - { - last_pci_addr.as_u32 = ~0; - } - } - else - last_pci_addr.as_u32 = ~0; - - clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, - sizeof (struct rte_eth_txconf)); - if (dm->conf->no_multi_seg) - { - xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - port_conf_template.rxmode.jumbo_frame = 0; - } - else - { - xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; - port_conf_template.rxmode.jumbo_frame = 1; - xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG; - } - - clib_memcpy (&xd->port_conf, &port_conf_template, - sizeof (struct rte_eth_conf)); - - xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains); - - if (devconf->num_tx_queues > 0 - && devconf->num_tx_queues < xd->tx_q_used) - xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); - - if (devconf->num_rx_queues > 1 && dm->use_rss == 0) - { - dm->use_rss = 1; - } - - if (devconf->num_rx_queues > 1 - && dev_info.max_rx_queues >= devconf->num_rx_queues) - { - xd->rx_q_used = devconf->num_rx_queues; - xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; - if (devconf->rss_fn == 0) - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = - ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; - else - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; - } - else - xd->rx_q_used = 1; - - xd->flags |= DPDK_DEVICE_FLAG_PMD; - - /* workaround for drivers not setting driver_name */ - if ((!dev_info.driver_name) && (dev_info.pci_dev)) - dev_info.driver_name = dev_info.pci_dev->driver->driver.name; - - ASSERT (dev_info.driver_name); - - if (!xd->pmd) - { - - -#define _(s,f) else if (dev_info.driver_name && \ - !strcmp(dev_info.driver_name, s)) \ - xd->pmd = VNET_DPDK_PMD_##f; - if (0) - ; - foreach_dpdk_pmd -#undef _ - else - xd->pmd = VNET_DPDK_PMD_UNKNOWN; - - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; - xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; - - switch (xd->pmd) - { - /* 1G adapters */ - case VNET_DPDK_PMD_E1000EM: - case VNET_DPDK_PMD_IGB: - case VNET_DPDK_PMD_IGBVF: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - break; - - /* 10G adapters */ - case VNET_DPDK_PMD_IXGBE: - case VNET_DPDK_PMD_IXGBEVF: - case VNET_DPDK_PMD_THUNDERX: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - case VNET_DPDK_PMD_DPAA2: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - - /* Cisco VIC */ - case VNET_DPDK_PMD_ENIC: - rte_eth_link_get_nowait (i, &l); - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - if (l.link_speed == 40000) - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - else - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - - /* Intel Fortville */ - case VNET_DPDK_PMD_I40E: - case VNET_DPDK_PMD_I40EVF: - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - - switch (dev_info.pci_dev->id.device_id) - { - case I40E_DEV_ID_10G_BASE_T: - case I40E_DEV_ID_SFP_XL710: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - case I40E_DEV_ID_QSFP_A: - case I40E_DEV_ID_QSFP_B: - case I40E_DEV_ID_QSFP_C: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case I40E_DEV_ID_VF: - rte_eth_link_get_nowait (i, &l); - xd->port_type = l.link_speed == 10000 ? - VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_CXGBE: - switch (dev_info.pci_dev->id.device_id) - { - case 0x540d: /* T580-CR */ - case 0x5410: /* T580-LP-cr */ - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case 0x5403: /* T540-CR */ - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_MLX5: - { - char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", 0 }; - char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT", - "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0 - }; - char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 }; - - vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr); - u8 *pn = 0; - char **c; - int found = 0; - pn = format (0, "%U%c", - format_vlib_pci_vpd, pd->vpd_r, "PN", 0); - - if (!pn) - break; - - c = pn_100g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G; - break; - } - c++; - } - - c = pn_40g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - } - c++; - } - - c = pn_10g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - } - c++; - } - - vec_free (pn); - } - - break; - /* Intel Red Rock Canyon */ - case VNET_DPDK_PMD_FM10K: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; - break; - - /* virtio */ - case VNET_DPDK_PMD_VIRTIO: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; - xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; - break; - - /* vmxnet3 */ - case VNET_DPDK_PMD_VMXNET3: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - break; - - case VNET_DPDK_PMD_AF_PACKET: - xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; - xd->af_packet_port_id = af_packet_port_id++; - break; - - case VNET_DPDK_PMD_BOND: - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; - break; - - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - - if (devconf->num_rx_desc) - xd->nb_rx_desc = devconf->num_rx_desc; - - if (devconf->num_tx_desc) - xd->nb_tx_desc = devconf->num_tx_desc; - } - - /* - * Ensure default mtu is not > the mtu read from the hardware. - * Otherwise rte_eth_dev_configure() will fail and the port will - * not be available. - */ - if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) - { - /* - * This device does not support the platforms's max frame - * size. Use it's advertised mru instead. - */ - xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; - } - else - { - xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; - - /* - * Some platforms do not account for Ethernet FCS (4 bytes) in - * MTU calculations. To interop with them increase mru but only - * if the device's settings can support it. - */ - if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && - xd->port_conf.rxmode.hw_strip_crc) - { - /* - * Allow additional 4 bytes (for Ethernet FCS). These bytes are - * stripped by h/w and so will not consume any buffer memory. - */ - xd->port_conf.rxmode.max_rx_pkt_len += 4; - } - } - - if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) - { - f64 now = vlib_time_now (vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - clib_memcpy (addr + 2, &rnd, sizeof (rnd)); - addr[0] = 2; - addr[1] = 0xfe; - } - else - rte_eth_macaddr_get (i, (struct ether_addr *) addr); - - if (xd->tx_q_used < tm->n_vlib_mains) - dpdk_device_lock_init (xd); - - xd->device_index = xd - dm->devices; - ASSERT (i == xd->device_index); - xd->per_interface_next_index = ~0; - - /* assign interface to input thread */ - dpdk_device_and_queue_t *dq; - int q; - - if (devconf->workers) - { - int i; - q = 0; - /* *INDENT-OFF* */ - clib_bitmap_foreach (i, devconf->workers, ({ - int cpu = dm->input_cpu_first_index + i; - unsigned lcore = vlib_worker_threads[cpu].lcore_id; - vec_validate(xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); - vec_add2(dm->devices_by_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = q++; - })); - /* *INDENT-ON* */ - } - else - for (q = 0; q < xd->rx_q_used; q++) - { - int cpu = dm->input_cpu_first_index + next_cpu; - unsigned lcore = vlib_worker_threads[cpu].lcore_id; - - /* - * numa node for worker thread handling this queue - * needed for taking buffers from the right mempool - */ - vec_validate (xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); - - /* - * construct vector of (device,queue) pairs for each worker thread - */ - vec_add2 (dm->devices_by_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = q; - - next_cpu++; - if (next_cpu == dm->input_cpu_count) - next_cpu = 0; - } - - - if (devconf->hqos_enabled) - { - xd->flags |= DPDK_DEVICE_FLAG_HQOS; - - if (devconf->hqos.hqos_thread_valid) - { - int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; - - if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) - return clib_error_return (0, "invalid HQoS thread index"); - - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; - } - else - { - int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; - - if (dm->hqos_cpu_count == 0) - return clib_error_return (0, "no HQoS threads available"); - - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; - - next_hqos_cpu++; - if (next_hqos_cpu == dm->hqos_cpu_count) - next_hqos_cpu = 0; - - devconf->hqos.hqos_thread_valid = 1; - devconf->hqos.hqos_thread = cpu; - } - } - - vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < tm->n_vlib_mains; j++) - { - vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc, - sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->tx_vectors[j]); - } - - vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < xd->rx_q_used; j++) - { - vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, - CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->rx_vectors[j]); - } - - vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - - rv = dpdk_port_setup (dm, xd); - - if (rv) - return rv; - - if (devconf->hqos_enabled) - { - rv = dpdk_port_setup_hqos (xd, &devconf->hqos); - if (rv) - return rv; - } - - /* count the number of descriptors used for this device */ - nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; - - error = ethernet_register_interface - (dm->vnet_main, dpdk_device_class.index, xd->device_index, - /* ethernet address */ addr, - &xd->vlib_hw_if_index, dpdk_flag_change); - if (error) - return error; - - sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); - xd->vlib_sw_if_index = sw->sw_if_index; - hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); - - /* - * DAW-FIXME: The Cisco VIC firmware does not provide an api for a - * driver to dynamically change the mtu. If/when the - * VIC firmware gets fixed, then this should be removed. - */ - if (xd->pmd == VNET_DPDK_PMD_ENIC) - { - /* - * Initialize mtu to what has been set by CIMC in the firmware cfg. - */ - hi->max_packet_bytes = dev_info.max_rx_pktlen; - if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) - vlan_strip = 1; /* remove vlan tag from VIC port by default */ - else - clib_warning ("VLAN strip disabled for interface\n"); - } - else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) - vlan_strip = 1; - - if (vlan_strip) - { - int vlan_off; - vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); - vlan_off |= ETH_VLAN_STRIP_OFFLOAD; - xd->port_conf.rxmode.hw_vlan_strip = vlan_off; - if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0) - clib_warning ("VLAN strip enabled for interface\n"); - else - clib_warning ("VLAN strip cannot be supported by interface\n"); - } - - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = - xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); - - rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); - } - - if (nb_desc > dm->conf->num_mbufs) - clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", - dm->conf->num_mbufs, nb_desc); - - return 0; -} - -static void -dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) -{ - vlib_pci_main_t *pm = &pci_main; - clib_error_t *error; - vlib_pci_device_t *d; - u8 *pci_addr = 0; - int num_whitelisted = vec_len (conf->dev_confs); - - /* *INDENT-OFF* */ - pool_foreach (d, pm->pci_devs, ({ - dpdk_device_config_t * devconf = 0; - vec_reset_length (pci_addr); - pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); - - if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) - continue; - - if (num_whitelisted) - { - uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); - - if (!p) - continue; - - devconf = pool_elt_at_index (conf->dev_confs, p[0]); - } - - /* virtio */ - if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) - ; - /* vmxnet3 */ - else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) - ; - /* all Intel devices */ - else if (d->vendor_id == 0x8086) - ; - /* Cisco VIC */ - else if (d->vendor_id == 0x1137 && d->device_id == 0x0043) - ; - /* Chelsio T4/T5 */ - else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000) - ; - else - { - clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found " - "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id, - pci_addr); - continue; - } - - error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); - - if (error) - { - if (devconf == 0) - { - pool_get (conf->dev_confs, devconf); - hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, - devconf - conf->dev_confs); - devconf->pci_addr.as_u32 = d->bus_address.as_u32; - } - devconf->is_blacklisted = 1; - clib_error_report (error); - } - })); - /* *INDENT-ON* */ - vec_free (pci_addr); -} - -static clib_error_t * -dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, - unformat_input_t * input, u8 is_default) -{ - clib_error_t *error = 0; - uword *p; - dpdk_device_config_t *devconf; - unformat_input_t sub_input; - - if (is_default) - { - devconf = &conf->default_devconf; - } - else - { - p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32); - - if (!p) - { - pool_get (conf->dev_confs, devconf); - hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, - devconf - conf->dev_confs); - } - else - return clib_error_return (0, - "duplicate configuration for PCI address %U", - format_vlib_pci_addr, &pci_addr); - } - - devconf->pci_addr.as_u32 = pci_addr.as_u32; - devconf->hqos_enabled = 0; - dpdk_device_config_hqos_default (&devconf->hqos); - - if (!input) - return 0; - - unformat_skip_white_space (input); - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues)) - ; - else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues)) - ; - else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc)) - ; - else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc)) - ; - else if (unformat (input, "workers %U", unformat_bitmap_list, - &devconf->workers)) - ; - else - if (unformat - (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) - { - error = unformat_rss_fn (&sub_input, &devconf->rss_fn); - if (error) - break; - } - else if (unformat (input, "vlan-strip-offload off")) - devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; - else if (unformat (input, "vlan-strip-offload on")) - devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; - else - if (unformat - (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input)) - { - devconf->hqos_enabled = 1; - error = unformat_hqos (&sub_input, &devconf->hqos); - if (error) - break; - } - else if (unformat (input, "hqos")) - { - devconf->hqos_enabled = 1; - } - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - break; - } - } - - if (error) - return error; - - if (devconf->workers && devconf->num_rx_queues == 0) - devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers); - else if (devconf->workers && - clib_bitmap_count_set_bits (devconf->workers) != - devconf->num_rx_queues) - error = - clib_error_return (0, - "%U: number of worker threadds must be " - "equal to number of rx queues", format_vlib_pci_addr, - &pci_addr); - - return error; -} - -static clib_error_t * -dpdk_config (vlib_main_t * vm, unformat_input_t * input) -{ - clib_error_t *error = 0; - dpdk_main_t *dm = &dpdk_main; - dpdk_config_main_t *conf = &dpdk_config_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_config_t *devconf; - vlib_pci_addr_t pci_addr; - unformat_input_t sub_input; - u8 *s, *tmp = 0; - u8 *rte_cmd = 0, *ethname = 0; - u32 log_level; - int ret, i; - int num_whitelisted = 0; - u8 no_pci = 0; - u8 no_huge = 0; - u8 huge_dir = 0; - u8 file_prefix = 0; - u8 *socket_mem = 0; - - conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - /* Prime the pump */ - if (unformat (input, "no-hugetlb")) - { - vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); - no_huge = 1; - } - - else if (unformat (input, "enable-tcp-udp-checksum")) - conf->enable_tcp_udp_checksum = 1; - - else if (unformat (input, "decimal-interface-names")) - conf->interface_name_format_decimal = 1; - - else if (unformat (input, "no-multi-seg")) - conf->no_multi_seg = 1; - - else if (unformat (input, "enable-cryptodev")) - conf->cryptodev = 1; - - else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, - &sub_input)) - { - error = - dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input, - 1); - - if (error) - return error; - } - else - if (unformat - (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, - unformat_vlib_cli_sub_input, &sub_input)) - { - error = dpdk_device_config (conf, pci_addr, &sub_input, 0); - - if (error) - return error; - - num_whitelisted++; - } - else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr)) - { - error = dpdk_device_config (conf, pci_addr, 0, 0); - - if (error) - return error; - - num_whitelisted++; - } - else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) - ; - else if (unformat (input, "kni %d", &conf->num_kni)) - ; - else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) - ; - else if (unformat (input, "socket-mem %s", &socket_mem)) - ; - else if (unformat (input, "no-pci")) - { - no_pci = 1; - tmp = format (0, "--no-pci%c", 0); - vec_add1 (conf->eal_init_args, tmp); - } - else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) - ; - -#define _(a) \ - else if (unformat(input, #a)) \ - { \ - tmp = format (0, "--%s%c", #a, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - } - foreach_eal_double_hyphen_predicate_arg -#undef _ -#define _(a) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - if (!strncmp(#a, "huge-dir", 8)) \ - huge_dir = 1; \ - else if (!strncmp(#a, "file-prefix", 11)) \ - file_prefix = 1; \ - tmp = format (0, "--%s%c", #a, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - if (!strncmp(#a, "vdev", 4)) \ - if (strstr((char*)s, "af_packet")) \ - clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \ - vec_add1 (conf->eal_init_args, s); \ - } - foreach_eal_double_hyphen_arg -#undef _ -#define _(a,b) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - tmp = format (0, "-%s%c", #b, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - vec_add1 (conf->eal_init_args, s); \ - } - foreach_eal_single_hyphen_arg -#undef _ -#define _(a,b) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - tmp = format (0, "-%s%c", #b, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - vec_add1 (conf->eal_init_args, s); \ - conf->a##_set_manually = 1; \ - } - foreach_eal_single_hyphen_mandatory_arg -#undef _ - else if (unformat (input, "default")) - ; - - else if (unformat_skip_white_space (input)) - ; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - goto done; - } - } - - if (!conf->uio_driver_name) - conf->uio_driver_name = format (0, "uio_pci_generic%c", 0); - - /* - * Use 1G huge pages if available. - */ - if (!no_huge && !huge_dir) - { - u32 x, *mem_by_socket = 0; - uword c = 0; - u8 use_1g = 1; - u8 use_2m = 1; - u8 less_than_1g = 1; - int rv; - - umount (DEFAULT_HUGE_DIR); - - /* Process "socket-mem" parameter value */ - if (vec_len (socket_mem)) - { - unformat_input_t in; - unformat_init_vector (&in, socket_mem); - while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) - { - if (unformat (&in, "%u,", &x)) - ; - else if (unformat (&in, "%u", &x)) - ; - else if (unformat (&in, ",")) - x = 0; - else - break; - - vec_add1 (mem_by_socket, x); - - if (x > 1023) - less_than_1g = 0; - } - /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ - unformat_free (&in); - socket_mem = 0; - } - else - { - /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( - { - vec_validate(mem_by_socket, c); - mem_by_socket[c] = 256; /* default per-socket mem */ - } - )); - /* *INDENT-ON* */ - } - - /* check if available enough 1GB pages for each socket */ - /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( - { - int pages_avail, page_size, mem; - - vec_validate(mem_by_socket, c); - mem = mem_by_socket[c]; - - page_size = 1024; - pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); - - if (pages_avail < 0 || page_size * pages_avail < mem) - use_1g = 0; - - page_size = 2; - pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); - - if (pages_avail < 0 || page_size * pages_avail < mem) - use_2m = 0; - })); - /* *INDENT-ON* */ - - if (mem_by_socket == 0) - { - error = clib_error_return (0, "mem_by_socket NULL"); - goto done; - } - _vec_len (mem_by_socket) = c + 1; - - /* regenerate socket_mem string */ - vec_foreach_index (x, mem_by_socket) - socket_mem = format (socket_mem, "%s%u", - socket_mem ? "," : "", mem_by_socket[x]); - socket_mem = format (socket_mem, "%c", 0); - - vec_free (mem_by_socket); - - rv = mkdir (VPP_RUN_DIR, 0755); - if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - VPP_RUN_DIR, errno); - goto done; - } - - rv = mkdir (DEFAULT_HUGE_DIR, 0755); - if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - DEFAULT_HUGE_DIR, errno); - goto done; - } - - if (use_1g && !(less_than_1g && use_2m)) - { - rv = - mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); - } - else if (use_2m) - { - rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); - } - else - { - return clib_error_return (0, "not enough free huge pages"); - } - - if (rv) - { - error = clib_error_return (0, "mount failed %d", errno); - goto done; - } - - tmp = format (0, "--huge-dir%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); - vec_add1 (conf->eal_init_args, tmp); - if (!file_prefix) - { - tmp = format (0, "--file-prefix%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "vpp%c", 0); - vec_add1 (conf->eal_init_args, tmp); - } - } - - vec_free (rte_cmd); - vec_free (ethname); - - if (error) - return error; - - /* I'll bet that -c and -n must be the first and second args... */ - if (!conf->coremask_set_manually) - { - vlib_thread_registration_t *tr; - uword *coremask = 0; - int i; - - /* main thread core */ - coremask = clib_bitmap_set (coremask, tm->main_lcore, 1); - - for (i = 0; i < vec_len (tm->registrations); i++) - { - tr = tm->registrations[i]; - coremask = clib_bitmap_or (coremask, tr->coremask); - } - - vec_insert (conf->eal_init_args, 2, 1); - conf->eal_init_args[1] = (u8 *) "-c"; - tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); - conf->eal_init_args[2] = tmp; - clib_bitmap_free (coremask); - } - - if (!conf->nchannels_set_manually) - { - vec_insert (conf->eal_init_args, 2, 3); - conf->eal_init_args[3] = (u8 *) "-n"; - tmp = format (0, "%d", conf->nchannels); - conf->eal_init_args[4] = tmp; - } - - if (no_pci == 0 && geteuid () == 0) - dpdk_bind_devices_to_uio (conf); - -#define _(x) \ - if (devconf->x == 0 && conf->default_devconf.x > 0) \ - devconf->x = conf->default_devconf.x ; - - /* *INDENT-OFF* */ - pool_foreach (devconf, conf->dev_confs, ({ - - /* default per-device config items */ - foreach_dpdk_device_config_item - - /* add DPDK EAL whitelist/blacklist entry */ - if (num_whitelisted > 0 && devconf->is_blacklisted == 0) - { - tmp = format (0, "-w%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); - vec_add1 (conf->eal_init_args, tmp); - } - else if (num_whitelisted == 0 && devconf->is_blacklisted != 0) - { - tmp = format (0, "-b%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); - vec_add1 (conf->eal_init_args, tmp); - } - })); - /* *INDENT-ON* */ - -#undef _ - - /* set master-lcore */ - tmp = format (0, "--master-lcore%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%u%c", tm->main_lcore, 0); - vec_add1 (conf->eal_init_args, tmp); - - /* set socket-mem */ - tmp = format (0, "--socket-mem%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%s%c", socket_mem, 0); - vec_add1 (conf->eal_init_args, tmp); - - /* NULL terminate the "argv" vector, in case of stupidity */ - vec_add1 (conf->eal_init_args, 0); - _vec_len (conf->eal_init_args) -= 1; - - /* Set up DPDK eal and packet mbuf pool early. */ - - log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; - - rte_set_log_level (log_level); - - vm = vlib_get_main (); - - /* make copy of args as rte_eal_init tends to mess up with arg array */ - for (i = 1; i < vec_len (conf->eal_init_args); i++) - conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", - conf->eal_init_args[i]); - - ret = - rte_eal_init (vec_len (conf->eal_init_args), - (char **) conf->eal_init_args); - - /* lazy umount hugepages */ - umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); - - if (ret < 0) - return clib_error_return (0, "rte_eal_init returned %d", ret); - - /* Dump the physical memory layout prior to creating the mbuf_pool */ - fprintf (stdout, "DPDK physical memory layout:\n"); - rte_dump_physmem_layout (stdout); - - /* main thread 1st */ - error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); - if (error) - return error; - - for (i = 0; i < RTE_MAX_LCORE; i++) - { - error = vlib_buffer_pool_create (vm, conf->num_mbufs, - rte_lcore_to_socket_id (i)); - if (error) - return error; - } - -done: - return error; -} - -VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); - -void -dpdk_update_link_state (dpdk_device_t * xd, f64 now) -{ - vnet_main_t *vnm = vnet_get_main (); - struct rte_eth_link prev_link = xd->link; - u32 hw_flags = 0; - u8 hw_flags_chg = 0; - - /* only update link state for PMD interfaces */ - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - return; - - xd->time_last_link_update = now ? now : xd->time_last_link_update; - memset (&xd->link, 0, sizeof (xd->link)); - rte_eth_link_get_nowait (xd->device_index, &xd->link); - - if (LINK_STATE_ELOGS) - { - vlib_main_t *vm = vlib_get_main (); - ELOG_TYPE_DECLARE (e) = - { - .format = - "update-link-state: sw_if_index %d, admin_up %d," - "old link_state %d new link_state %d",.format_args = "i4i1i1i1",}; - - struct - { - u32 sw_if_index; - u8 admin_up; - u8 old_link_state; - u8 new_link_state; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->sw_if_index = xd->vlib_sw_if_index; - ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0; - ed->old_link_state = (u8) - vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); - ed->new_link_state = (u8) xd->link.link_status; - } - - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) && - ((xd->link.link_status != 0) ^ - vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) - { - hw_flags_chg = 1; - hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); - } - - if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) - { - hw_flags_chg = 1; - switch (xd->link.link_duplex) - { - case ETH_LINK_HALF_DUPLEX: - hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; - break; - case ETH_LINK_FULL_DUPLEX: - hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; - break; - default: - break; - } - } - if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) - { - hw_flags_chg = 1; - switch (xd->link.link_speed) - { - case ETH_SPEED_NUM_10M: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; - break; - case ETH_SPEED_NUM_100M: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; - break; - case ETH_SPEED_NUM_1G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; - break; - case ETH_SPEED_NUM_10G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; - break; - case ETH_SPEED_NUM_40G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; - break; - case 0: - break; - default: - clib_warning ("unknown link speed %d", xd->link.link_speed); - break; - } - } - if (hw_flags_chg) - { - if (LINK_STATE_ELOGS) - { - vlib_main_t *vm = vlib_get_main (); - - ELOG_TYPE_DECLARE (e) = - { - .format = - "update-link-state: sw_if_index %d, new flags %d",.format_args - = "i4i4",}; - - struct - { - u32 sw_if_index; - u32 flags; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->sw_if_index = xd->vlib_sw_if_index; - ed->flags = hw_flags; - } - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); - } -} - -static uword -dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - clib_error_t *error; - vnet_main_t *vnm = vnet_get_main (); - dpdk_main_t *dm = &dpdk_main; - ethernet_main_t *em = ðernet_main; - dpdk_device_t *xd; - vlib_thread_main_t *tm = vlib_get_thread_main (); - int i; - - error = dpdk_lib_init (dm); - - /* - * Turn on the input node if we found some devices to drive - * and we're not running worker threads or i/o threads - */ - - if (error == 0 && vec_len (dm->devices) > 0) - { - if (tm->n_vlib_mains == 1) - vlib_node_set_state (vm, dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - else - for (i = 0; i < tm->n_vlib_mains; i++) - if (vec_len (dm->devices_by_cpu[i]) > 0) - vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - } - - if (error) - clib_error_report (error); - - tm->worker_thread_release = 1; - - f64 now = vlib_time_now (vm); - vec_foreach (xd, dm->devices) - { - dpdk_update_link_state (xd, now); - } - - { - /* - * Extra set up for bond interfaces: - * 1. Setup MACs for bond interfaces and their slave links which was set - * in dpdk_port_setup() but needs to be done again here to take effect. - * 2. Set up info for bond interface related CLI support. - */ - int nports = rte_eth_dev_count (); - if (nports > 0) - { - for (i = 0; i < nports; i++) - { - struct rte_eth_dev_info dev_info; - rte_eth_dev_info_get (i, &dev_info); - if (!dev_info.driver_name) - dev_info.driver_name = dev_info.pci_dev->driver->driver.name; - - ASSERT (dev_info.driver_name); - if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) - { - u8 addr[6]; - u8 slink[16]; - int nlink = rte_eth_bond_slaves_get (i, slink, 16); - if (nlink > 0) - { - vnet_hw_interface_t *bhi; - ethernet_interface_t *bei; - int rv; - - /* Get MAC of 1st slave link */ - rte_eth_macaddr_get (slink[0], - (struct ether_addr *) addr); - /* Set MAC of bounded interface to that of 1st slave link */ - rv = - rte_eth_bond_mac_address_set (i, - (struct ether_addr *) - addr); - if (rv < 0) - clib_warning ("Failed to set MAC address"); - - /* Populate MAC of bonded interface in VPP hw tables */ - bhi = - vnet_get_hw_interface (vnm, - dm->devices[i].vlib_hw_if_index); - bei = - pool_elt_at_index (em->interfaces, bhi->hw_instance); - clib_memcpy (bhi->hw_address, addr, 6); - clib_memcpy (bei->address, addr, 6); - /* Init l3 packet size allowed on bonded interface */ - bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); - while (nlink >= 1) - { /* for all slave links */ - int slave = slink[--nlink]; - dpdk_device_t *sdev = &dm->devices[slave]; - vnet_hw_interface_t *shi; - vnet_sw_interface_t *ssi; - /* Add MAC to all slave links except the first one */ - if (nlink) - rte_eth_dev_mac_addr_add (slave, - (struct ether_addr *) - addr, 0); - /* Set slaves bitmap for bonded interface */ - bhi->bond_info = - clib_bitmap_set (bhi->bond_info, - sdev->vlib_hw_if_index, 1); - /* Set slave link flags on slave interface */ - shi = - vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index); - ssi = - vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); - shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; - ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; - - /* Set l3 packet size allowed as the lowest of slave */ - if (bhi->max_l3_packet_bytes[VLIB_RX] > - shi->max_l3_packet_bytes[VLIB_RX]) - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - shi->max_l3_packet_bytes[VLIB_RX]; - - /* Set max packet size allowed as the lowest of slave */ - if (bhi->max_packet_bytes > shi->max_packet_bytes) - bhi->max_packet_bytes = shi->max_packet_bytes; - } - } - } - } - } - } - - while (1) - { - /* - * check each time through the loop in case intervals are changed - */ - f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? - dm->link_state_poll_interval : dm->stat_poll_interval; - - vlib_process_wait_for_event_or_clock (vm, min_wait); - - if (dm->admin_up_down_in_progress) - /* skip the poll if an admin up down is in progress (on any interface) */ - continue; - - vec_foreach (xd, dm->devices) - { - f64 now = vlib_time_now (vm); - if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) - dpdk_update_counters (xd, now); - if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) - dpdk_update_link_state (xd, now); - - } - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_process_node,static) = { - .function = dpdk_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "dpdk-process", - .process_log2_n_stack_bytes = 17, -}; -/* *INDENT-ON* */ - -int -dpdk_set_stat_poll_interval (f64 interval) -{ - if (interval < DPDK_MIN_STATS_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); - - dpdk_main.stat_poll_interval = interval; - - return 0; -} - -int -dpdk_set_link_state_poll_interval (f64 interval) -{ - if (interval < DPDK_MIN_LINK_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); - - dpdk_main.link_state_poll_interval = interval; - - return 0; -} - -clib_error_t * -dpdk_init (vlib_main_t * vm) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_node_t *ei; - clib_error_t *error = 0; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - /* verify that structs are cacheline aligned */ - STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0, - "Cache line marker must be 1st element in dpdk_device_t"); - STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == - CLIB_CACHE_LINE_BYTES, - "Data in cache line 0 is bigger than cache line size"); - STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, - "Cache line marker must be 1st element in frame_queue_trace_t"); - - dm->vlib_main = vm; - dm->vnet_main = vnet_get_main (); - dm->conf = &dpdk_config_main; - - ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); - if (ei == 0) - return clib_error_return (0, "ethernet-input node AWOL"); - - dm->ethernet_input_node_index = ei->index; - - dm->conf->nchannels = 4; - dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; - vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); - - dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); - dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); - dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); - - /* $$$ use n_thread_stacks since it's known-good at this point */ - vec_validate (dm->recycle, tm->n_thread_stacks - 1); - - /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ - dm->buffer_flags_template = - (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID - | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); - - dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; - dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; - - /* init CLI */ - if ((error = vlib_call_init_function (vm, dpdk_cli_init))) - return error; - - return error; -} - -VLIB_INIT_FUNCTION (dpdk_init); - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/cli.c b/src/vnet/devices/dpdk/ipsec/cli.c deleted file mode 100644 index f9d3a5d0..00000000 --- a/src/vnet/devices/dpdk/ipsec/cli.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -static void -dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) -{ - dpdk_config_main_t *conf = &dpdk_config_main; - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 i, skip_master; - - if (!conf->cryptodev) - { - vlib_cli_output (vm, "DPDK Cryptodev support is disabled\n"); - return; - } - - if (detail_display) - vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n", - "cipher", "auth"); - else - vlib_cli_output (vm, "worker\tcrypto device id(type)\n"); - - skip_master = vlib_num_workers () > 0; - - for (i = 0; i < tm->n_vlib_mains; i++) - { - uword key, data; - u32 cpu_index = vlib_mains[i]->cpu_index; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - u8 *s = 0; - - if (skip_master) - { - skip_master = 0; - continue; - } - - if (!detail_display) - { - i32 last_cdev = -1; - crypto_qp_data_t *qpd; - - s = format (s, "%u\t", cpu_index); - - /* *INDENT-OFF* */ - vec_foreach (qpd, cwm->qp_data) - { - u32 dev_id = qpd->dev_id; - - if ((u16) last_cdev != dev_id) - { - struct rte_cryptodev_info cdev_info; - - rte_cryptodev_info_get (dev_id, &cdev_info); - - s = format(s, "%u(%s)\t", dev_id, cdev_info.feature_flags & - RTE_CRYPTODEV_FF_HW_ACCELERATED ? "HW" : "SW"); - } - last_cdev = dev_id; - } - /* *INDENT-ON* */ - vlib_cli_output (vm, "%s", s); - } - else - { - char cipher_str[15], auth_str[15]; - struct rte_cryptodev_capabilities cap; - crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; - /* *INDENT-OFF* */ - hash_foreach (key, data, cwm->algo_qp_map, - ({ - cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; - cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER; - cap.sym.cipher.algo = p_key->cipher_algo; - check_algo_is_supported (&cap, cipher_str); - cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; - cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH; - cap.sym.auth.algo = p_key->auth_algo; - check_algo_is_supported (&cap, auth_str); - vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n", - vlib_mains[i]->cpu_index, cipher_str, auth_str, - p_key->is_outbound ? "out" : "in", - cwm->qp_data[data].dev_id, - cwm->qp_data[data].qp_id); - })); - /* *INDENT-ON* */ - } - } -} - -static clib_error_t * -lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u16 detail = 0; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "verbose")) - detail = 1; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - dpdk_ipsec_show_mapping (vm, detail); - -done: - unformat_free (line_input); - - return error; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (lcore_cryptodev_map, static) = { - .path = "show crypto device mapping", - .short_help = - "show cryptodev device mapping ", - .function = lcore_cryptodev_map_fn, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/crypto_node.c b/src/vnet/devices/dpdk/ipsec/crypto_node.c deleted file mode 100644 index e8fef235..00000000 --- a/src/vnet/devices/dpdk/ipsec/crypto_node.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - *------------------------------------------------------------------ - * crypto_node.c - DPDK Cryptodev input node - * - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include -#include -#include - -#include -#include -#include - -#define foreach_dpdk_crypto_input_next \ - _(DROP, "error-drop") \ - _(ENCRYPT_POST, "dpdk-esp-encrypt-post") \ - _(DECRYPT_POST, "dpdk-esp-decrypt-post") - -typedef enum -{ -#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f, - foreach_dpdk_crypto_input_next -#undef _ - DPDK_CRYPTO_INPUT_N_NEXT, -} dpdk_crypto_input_next_t; - -#define foreach_dpdk_crypto_input_error \ - _(DQ_COPS, "Crypto ops dequeued") \ - _(COP_FAILED, "Crypto op failed") - -typedef enum -{ -#define _(f,s) DPDK_CRYPTO_INPUT_ERROR_##f, - foreach_dpdk_crypto_input_error -#undef _ - DPDK_CRYPTO_INPUT_N_ERROR, -} dpdk_crypto_input_error_t; - -static char *dpdk_crypto_input_error_strings[] = { -#define _(n, s) s, - foreach_dpdk_crypto_input_error -#undef _ -}; - -vlib_node_registration_t dpdk_crypto_input_node; - -typedef struct -{ - u32 cdev; - u32 qp; - u32 status; - u32 sa_idx; - u32 next_index; -} dpdk_crypto_input_trace_t; - -static u8 * -format_dpdk_crypto_input_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *); - - s = format (s, "dpdk_crypto: cryptodev-id %u queue-pair %u next-index %d", - t->cdev, t->qp, t->next_index); - - s = format (s, " status %u sa-idx %u\n", t->status, t->sa_idx); - - return s; -} - -static_always_inline u32 -dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, - crypto_qp_data_t * qpd) -{ - u32 n_deq, *to_next = 0, next_index, n_cops, def_next_index; - struct rte_crypto_op **cops = qpd->cops; - - if (qpd->inflights == 0) - return 0; - - if (qpd->is_outbound) - def_next_index = DPDK_CRYPTO_INPUT_NEXT_ENCRYPT_POST; - else - def_next_index = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST; - - n_cops = rte_cryptodev_dequeue_burst (qpd->dev_id, qpd->qp_id, - cops, VLIB_FRAME_SIZE); - n_deq = n_cops; - next_index = def_next_index; - - qpd->inflights -= n_cops; - ASSERT (qpd->inflights >= 0); - - while (n_cops > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_cops > 0 && n_left_to_next > 0) - { - u32 bi0, next0; - vlib_buffer_t *b0 = 0; - struct rte_crypto_op *cop; - struct rte_crypto_sym_op *sym_cop; - - cop = cops[0]; - cops += 1; - n_cops -= 1; - n_left_to_next -= 1; - - next0 = def_next_index; - - if (PREDICT_FALSE (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) - { - next0 = DPDK_CRYPTO_INPUT_NEXT_DROP; - vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, - DPDK_CRYPTO_INPUT_ERROR_COP_FAILED, - 1); - } - cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; - - sym_cop = (struct rte_crypto_sym_op *) (cop + 1); - b0 = vlib_buffer_from_rte_mbuf (sym_cop->m_src); - bi0 = vlib_get_buffer_index (vm, b0); - - to_next[0] = bi0; - to_next += 1; - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - vlib_trace_next_frame (vm, node, next0); - dpdk_crypto_input_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->cdev = qpd->dev_id; - tr->qp = qpd->qp_id; - tr->status = cop->status; - tr->next_index = next0; - tr->sa_idx = vnet_buffer (b0)->ipsec.sad_index; - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - crypto_free_cop (qpd, qpd->cops, n_deq); - - vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, - DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq); - return n_deq; -} - -static uword -dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 cpu_index = os_get_cpu_number (); - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - crypto_qp_data_t *qpd; - u32 n_deq = 0; - - /* *INDENT-OFF* */ - vec_foreach (qpd, cwm->qp_data) - n_deq += dpdk_crypto_dequeue(vm, node, qpd); - /* *INDENT-ON* */ - - return n_deq; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_crypto_input_node) = -{ - .function = dpdk_crypto_input_fn, - .name = "dpdk-crypto-input", - .format_trace = format_dpdk_crypto_input_trace, - .type = VLIB_NODE_TYPE_INPUT, - .state = VLIB_NODE_STATE_DISABLED, - .n_errors = DPDK_CRYPTO_INPUT_N_ERROR, - .error_strings = dpdk_crypto_input_error_strings, - .n_next_nodes = DPDK_CRYPTO_INPUT_N_NEXT, - .next_nodes = - { -#define _(s,n) [DPDK_CRYPTO_INPUT_NEXT_##s] = n, - foreach_dpdk_crypto_input_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_crypto_input_node, dpdk_crypto_input_fn) -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/dir.dox b/src/vnet/devices/dpdk/ipsec/dir.dox deleted file mode 100644 index ffebfc4d..00000000 --- a/src/vnet/devices/dpdk/ipsec/dir.dox +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - @dir vnet/vnet/devices/dpdk/ipsec - @brief IPSec ESP encrypt/decrypt using DPDK Cryptodev API -*/ diff --git a/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md b/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md deleted file mode 100644 index fed2fe0e..00000000 --- a/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md +++ /dev/null @@ -1,86 +0,0 @@ -# VPP IPSec implementation using DPDK Cryptodev API {#dpdk_crypto_ipsec_doc} - -This document is meant to contain all related information about implementation and usability. - - -## VPP IPsec with DPDK Cryptodev - -DPDK Cryptodev is an asynchronous crypto API that supports both Hardware and Software implementations (for more details refer to [DPDK Cryptography Device Library documentation](http://dpdk.org/doc/guides/prog_guide/cryptodev_lib.html)). - -When DPDK support is enabled and there are enough Cryptodev resources for all workers, the node graph is reconfigured by adding and changing default next nodes. - -The following nodes are added: -* dpdk-crypto-input : polling input node, basically dequeuing from crypto devices. -* dpdk-esp-encrypt : internal node. -* dpdk-esp-decrypt : internal node. -* dpdk-esp-encrypt-post : internal node. -* dpdk-esp-decrypt-post : internal node. - -Set new default next nodes: -* for esp encryption: esp-encrypt -> dpdk-esp-encrypt -* for esp decryption: esp-decrypt -> dpdk-esp-decrypt - - -### How to enable VPP IPSec with DPDK Cryptodev support - -DPDK Cryptodev is supported in DPDK enabled VPP. -By default, only HW Cryptodev is supported but needs to be explicetly enabled with the following config option: - -``` -dpdk { - enable-cryptodev -} -``` - -To enable SW Cryptodev support (AESNI-MB-PMD and GCM-PMD), we need the following env option: - - vpp_uses_dpdk_cryptodev_sw=yes - -A couple of ways to achive this: -* uncomment/add it in the platforms config (ie. build-data/platforms/vpp.mk) -* set the option when building vpp (ie. make vpp_uses_dpdk_cryptodev_sw=yes build-release) - -When enabling SW Cryptodev support, it means that you need to pre-build the required crypto libraries needed by those SW Cryptodev PMDs. - - -### Crypto Resources allocation - -VPP allocates crypto resources based on a best effort approach: -* first allocate Hardware crypto resources, then Software. -* if there are not enough crypto resources for all workers, the graph node is not modifed, therefore the default VPP IPsec implementation based in OpenSSL is used. The following message is displayed: - - 0: dpdk_ipsec_init: not enough cryptodevs for ipsec - - -### Configuration example - -To enable DPDK Cryptodev the user just need to provide the startup.conf option -as mentioned previously. - -Example startup.conf: - -``` -dpdk { - socket-mem 1024,1024 - num-mbufs 131072 - dev 0000:81:00.0 - dev 0000:81:00.1 - enable-cryptodev - dev 0000:85:01.0 - dev 0000:85:01.1 - vdev cryptodev_aesni_mb_pmd,socket_id=1 - vdev cryptodev_aesni_mb_pmd,socket_id=1 -} -``` - -In the above configuration: -* 0000:85:01.0 and 0000:85:01.1 are crypto BDFs and they require the same driver binding as DPDK Ethernet devices but they do not support any extra configuration options. -* Two AESNI-MB Software Cryptodev PMDs are created in NUMA node 1. - -For further details refer to [DPDK Crypto Device Driver documentation](http://dpdk.org/doc/guides/cryptodevs/index.html) - -### Operational data - -The following CLI command displays the Cryptodev/Worker mapping: - - show crypto device mapping [verbose] diff --git a/src/vnet/devices/dpdk/ipsec/esp.h b/src/vnet/devices/dpdk/ipsec/esp.h deleted file mode 100644 index d0b27618..00000000 --- a/src/vnet/devices/dpdk/ipsec/esp.h +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __DPDK_ESP_H__ -#define __DPDK_ESP_H__ - -#include -#include -#include - -typedef struct -{ - enum rte_crypto_cipher_algorithm algo; - u8 key_len; - u8 iv_len; -} dpdk_esp_crypto_alg_t; - -typedef struct -{ - enum rte_crypto_auth_algorithm algo; - u8 trunc_size; -} dpdk_esp_integ_alg_t; - -typedef struct -{ - dpdk_esp_crypto_alg_t *esp_crypto_algs; - dpdk_esp_integ_alg_t *esp_integ_algs; -} dpdk_esp_main_t; - -dpdk_esp_main_t dpdk_esp_main; - -static_always_inline void -dpdk_esp_init () -{ - dpdk_esp_main_t *em = &dpdk_esp_main; - dpdk_esp_integ_alg_t *i; - dpdk_esp_crypto_alg_t *c; - - vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1); - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128]; - c->algo = RTE_CRYPTO_CIPHER_AES_CBC; - c->key_len = 16; - c->iv_len = 16; - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192]; - c->algo = RTE_CRYPTO_CIPHER_AES_CBC; - c->key_len = 24; - c->iv_len = 16; - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256]; - c->algo = RTE_CRYPTO_CIPHER_AES_CBC; - c->key_len = 32; - c->iv_len = 16; - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128]; - c->algo = RTE_CRYPTO_CIPHER_AES_GCM; - c->key_len = 16; - c->iv_len = 8; - - vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1); - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96]; - i->algo = RTE_CRYPTO_AUTH_SHA1_HMAC; - i->trunc_size = 12; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96]; - i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - i->trunc_size = 12; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128]; - i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - i->trunc_size = 16; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192]; - i->algo = RTE_CRYPTO_AUTH_SHA384_HMAC; - i->trunc_size = 24; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256]; - i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC; - i->trunc_size = 32; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128]; - i->algo = RTE_CRYPTO_AUTH_AES_GCM; - i->trunc_size = 16; -} - -static_always_inline int -translate_crypto_algo (ipsec_crypto_alg_t crypto_algo, - struct rte_crypto_sym_xform *cipher_xform) -{ - switch (crypto_algo) - { - case IPSEC_CRYPTO_ALG_NONE: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL; - break; - case IPSEC_CRYPTO_ALG_AES_CBC_128: - case IPSEC_CRYPTO_ALG_AES_CBC_192: - case IPSEC_CRYPTO_ALG_AES_CBC_256: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; - break; - case IPSEC_CRYPTO_ALG_AES_GCM_128: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM; - break; - default: - return -1; - } - - cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; - - return 0; -} - -static_always_inline int -translate_integ_algo (ipsec_integ_alg_t integ_alg, - struct rte_crypto_sym_xform *auth_xform, int use_esn) -{ - switch (integ_alg) - { - case IPSEC_INTEG_ALG_NONE: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL; - auth_xform->auth.digest_length = 0; - break; - case IPSEC_INTEG_ALG_SHA1_96: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC; - auth_xform->auth.digest_length = 12; - break; - case IPSEC_INTEG_ALG_SHA_256_96: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - auth_xform->auth.digest_length = 12; - break; - case IPSEC_INTEG_ALG_SHA_256_128: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - auth_xform->auth.digest_length = 16; - break; - case IPSEC_INTEG_ALG_SHA_384_192: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA384_HMAC; - auth_xform->auth.digest_length = 24; - break; - case IPSEC_INTEG_ALG_SHA_512_256: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC; - auth_xform->auth.digest_length = 32; - break; - case IPSEC_INTEG_ALG_AES_GCM_128: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM; - auth_xform->auth.digest_length = 16; - auth_xform->auth.add_auth_data_length = use_esn ? 12 : 8; - break; - default: - return -1; - } - - auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; - - return 0; -} - -static_always_inline int -create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, - u8 is_outbound) -{ - u32 cpu_index = os_get_cpu_number (); - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - struct rte_crypto_sym_xform cipher_xform = { 0 }; - struct rte_crypto_sym_xform auth_xform = { 0 }; - struct rte_crypto_sym_xform *xfs; - uword key = 0, *data; - crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; - - if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - sa->crypto_key_len -= 4; - clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4); - } - else - { - u32 seed = (u32) clib_cpu_time_now (); - sa->salt = random_u32 (&seed); - } - - cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; - cipher_xform.cipher.key.data = sa->crypto_key; - cipher_xform.cipher.key.length = sa->crypto_key_len; - - auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH; - auth_xform.auth.key.data = sa->integ_key; - auth_xform.auth.key.length = sa->integ_key_len; - - if (translate_crypto_algo (sa->crypto_alg, &cipher_xform) < 0) - return -1; - p_key->cipher_algo = cipher_xform.cipher.algo; - - if (translate_integ_algo (sa->integ_alg, &auth_xform, sa->use_esn) < 0) - return -1; - p_key->auth_algo = auth_xform.auth.algo; - - if (is_outbound) - { - cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; - auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; - cipher_xform.next = &auth_xform; - xfs = &cipher_xform; - } - else - { - cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; - auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; - auth_xform.next = &cipher_xform; - xfs = &auth_xform; - } - - p_key->is_outbound = is_outbound; - - data = hash_get (cwm->algo_qp_map, key); - if (!data) - return -1; - - sa_sess->sess = - rte_cryptodev_sym_session_create (cwm->qp_data[*data].dev_id, xfs); - - if (!sa_sess->sess) - return -1; - - sa_sess->qp_index = (u8) * data; - - return 0; -} - -#endif /* __DPDK_ESP_H__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c b/src/vnet/devices/dpdk/ipsec/esp_decrypt.c deleted file mode 100644 index 76007609..00000000 --- a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c +++ /dev/null @@ -1,594 +0,0 @@ -/* - * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev - * - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include - -#define foreach_esp_decrypt_next \ -_(DROP, "error-drop") \ -_(IP4_INPUT, "ip4-input") \ -_(IP6_INPUT, "ip6-input") - -#define _(v, s) ESP_DECRYPT_NEXT_##v, -typedef enum { - foreach_esp_decrypt_next -#undef _ - ESP_DECRYPT_N_NEXT, -} esp_decrypt_next_t; - -#define foreach_esp_decrypt_error \ - _(RX_PKTS, "ESP pkts received") \ - _(DECRYPTION_FAILED, "ESP decryption failed") \ - _(REPLAY, "SA replayed packet") \ - _(NOT_IP, "Not IP packet (dropped)") \ - _(ENQ_FAIL, "Enqueue failed (buffer full)") \ - _(NO_CRYPTODEV, "Cryptodev not configured") \ - _(BAD_LEN, "Invalid ciphertext length") \ - _(UNSUPPORTED, "Cipher/Auth not supported") - - -typedef enum { -#define _(sym,str) ESP_DECRYPT_ERROR_##sym, - foreach_esp_decrypt_error -#undef _ - ESP_DECRYPT_N_ERROR, -} esp_decrypt_error_t; - -static char * esp_decrypt_error_strings[] = { -#define _(sym,string) string, - foreach_esp_decrypt_error -#undef _ -}; - -vlib_node_registration_t dpdk_esp_decrypt_node; - -typedef struct { - ipsec_crypto_alg_t crypto_alg; - ipsec_integ_alg_t integ_alg; -} esp_decrypt_trace_t; - -/* packet trace format function */ -static u8 * format_esp_decrypt_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *); - - s = format (s, "esp: crypto %U integrity %U", - format_ipsec_crypto_alg, t->crypto_alg, - format_ipsec_integ_alg, t->integ_alg); - return s; -} - -static uword -dpdk_esp_decrypt_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next, next_index; - ipsec_main_t *im = &ipsec_main; - u32 cpu_index = os_get_cpu_number(); - dpdk_crypto_main_t * dcm = &dpdk_crypto_main; - dpdk_esp_main_t * em = &dpdk_esp_main; - u32 i; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - if (PREDICT_FALSE(!dcm->workers_main)) - { - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_NO_CRYPTODEV, n_left_from); - vlib_buffer_free(vm, from, n_left_from); - return n_left_from; - } - - crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index); - u32 n_qps = vec_len(cwm->qp_data); - struct rte_crypto_op ** cops_to_enq[n_qps]; - u32 n_cop_qp[n_qps], * bi_to_enq[n_qps]; - - for (i = 0; i < n_qps; i++) - { - bi_to_enq[i] = cwm->qp_data[i].bi; - cops_to_enq[i] = cwm->qp_data[i].cops; - } - - memset(n_cop_qp, 0, n_qps * sizeof(u32)); - - crypto_alloc_cops(); - - next_index = ESP_DECRYPT_NEXT_DROP; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, sa_index0 = ~0, seq, icv_size, iv_size; - vlib_buffer_t * b0; - esp_header_t * esp0; - ipsec_sa_t * sa0; - struct rte_mbuf * mb0 = 0; - const int BLOCK_SIZE = 16; - crypto_sa_session_t * sa_sess; - void * sess; - u16 qp_index; - struct rte_crypto_op * cop = 0; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - esp0 = vlib_buffer_get_current (b0); - - sa_index0 = vnet_buffer(b0)->ipsec.sad_index; - sa0 = pool_elt_at_index (im->sad, sa_index0); - - seq = clib_host_to_net_u32(esp0->seq); - - /* anti-replay check */ - if (sa0->use_anti_replay) - { - int rv = 0; - - if (PREDICT_TRUE(sa0->use_esn)) - rv = esp_replay_check_esn(sa0, seq); - else - rv = esp_replay_check(sa0, seq); - - if (PREDICT_FALSE(rv)) - { - clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_REPLAY, 1); - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - } - - sa0->total_data_size += b0->current_length; - - if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) || - PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE)) - { - clib_warning ("SPI %u : only cipher + auth supported", sa0->spi); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_UNSUPPORTED, 1); - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - - sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0); - - if (PREDICT_FALSE(!sa_sess->sess)) - { - int ret = create_sym_sess(sa0, sa_sess, 0); - - if (PREDICT_FALSE (ret)) - { - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - } - - sess = sa_sess->sess; - qp_index = sa_sess->qp_index; - - ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); - cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); - ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); - - cops_to_enq[qp_index][0] = cop; - cops_to_enq[qp_index] += 1; - n_cop_qp[qp_index] += 1; - bi_to_enq[qp_index][0] = bi0; - bi_to_enq[qp_index] += 1; - - rte_crypto_op_attach_sym_session(cop, sess); - - icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; - iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; - - /* Convert vlib buffer to mbuf */ - mb0 = rte_mbuf_from_vlib_buffer(b0); - mb0->data_len = b0->current_length; - mb0->pkt_len = b0->current_length; - mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; - - /* Outer IP header has already been stripped */ - u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) - - iv_size - icv_size; - - if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0)) - { - clib_warning ("payload %u not multiple of %d\n", - payload_len, BLOCK_SIZE); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_BAD_LEN, 1); - vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1); - bi_to_enq[qp_index] -= 1; - cops_to_enq[qp_index] -= 1; - n_cop_qp[qp_index] -= 1; - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - - struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1); - - sym_cop->m_src = mb0; - sym_cop->cipher.data.offset = sizeof (esp_header_t) + iv_size; - sym_cop->cipher.data.length = payload_len; - - u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t)); - dpdk_cop_priv_t * priv = (dpdk_cop_priv_t *)(sym_cop + 1); - - if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - dpdk_gcm_cnt_blk *icb = &priv->cb; - icb->salt = sa0->salt; - clib_memcpy(icb->iv, iv, 8); - icb->cnt = clib_host_to_net_u32(1); - sym_cop->cipher.iv.data = (u8 *)icb; - sym_cop->cipher.iv.phys_addr = cop->phys_addr + - (uintptr_t)icb - (uintptr_t)cop; - sym_cop->cipher.iv.length = 16; - - u8 *aad = priv->aad; - clib_memcpy(aad, iv - sizeof(esp_header_t), 8); - sym_cop->auth.aad.data = aad; - sym_cop->auth.aad.phys_addr = cop->phys_addr + - (uintptr_t)aad - (uintptr_t)cop; - if (sa0->use_esn) - { - *((u32*)&aad[8]) = sa0->seq_hi; - sym_cop->auth.aad.length = 12; - } - else - { - sym_cop->auth.aad.length = 8; - } - - sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.length = icv_size; - - } - else - { - sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(mb0, void*, - sizeof (esp_header_t)); - sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - sizeof (esp_header_t)); - sym_cop->cipher.iv.length = iv_size; - - if (sa0->use_esn) - { - dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1); - u8* payload_end = rte_pktmbuf_mtod_offset( - mb0, u8*, sizeof(esp_header_t) + iv_size + payload_len); - - clib_memcpy (priv->icv, payload_end, icv_size); - *((u32*) payload_end) = sa0->seq_hi; - sym_cop->auth.data.offset = 0; - sym_cop->auth.data.length = sizeof(esp_header_t) + iv_size - + payload_len + sizeof(sa0->seq_hi); - sym_cop->auth.digest.data = priv->icv; - sym_cop->auth.digest.phys_addr = cop->phys_addr - + (uintptr_t) priv->icv - (uintptr_t) cop; - sym_cop->auth.digest.length = icv_size; - } - else - { - sym_cop->auth.data.offset = 0; - sym_cop->auth.data.length = sizeof(esp_header_t) + - iv_size + payload_len; - - sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.length = icv_size; - } - } - -trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_RX_PKTS, - from_frame->n_vectors); - crypto_qp_data_t *qpd; - /* *INDENT-OFF* */ - vec_foreach_index (i, cwm->qp_data) - { - u32 enq; - - qpd = vec_elt_at_index(cwm->qp_data, i); - enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, - qpd->cops, n_cop_qp[i]); - qpd->inflights += enq; - - if (PREDICT_FALSE(enq < n_cop_qp[i])) - { - crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); - vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); - - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_ENQ_FAIL, - n_cop_qp[i] - enq); - } - } - /* *INDENT-ON* */ - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = { - .function = dpdk_esp_decrypt_node_fn, - .name = "dpdk-esp-decrypt", - .vector_size = sizeof (u32), - .format_trace = format_esp_decrypt_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(esp_decrypt_error_strings), - .error_strings = esp_decrypt_error_strings, - - .n_next_nodes = ESP_DECRYPT_N_NEXT, - .next_nodes = { -#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, - foreach_esp_decrypt_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn) - -/* - * Decrypt Post Node - */ - -#define foreach_esp_decrypt_post_error \ - _(PKTS, "ESP post pkts") - -typedef enum { -#define _(sym,str) ESP_DECRYPT_POST_ERROR_##sym, - foreach_esp_decrypt_post_error -#undef _ - ESP_DECRYPT_POST_N_ERROR, -} esp_decrypt_post_error_t; - -static char * esp_decrypt_post_error_strings[] = { -#define _(sym,string) string, - foreach_esp_decrypt_post_error -#undef _ -}; - -vlib_node_registration_t dpdk_esp_decrypt_post_node; - -static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args) -{ - return s; -} - -static uword -dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next = 0, next_index; - ipsec_sa_t * sa0; - u32 sa_index0 = ~0; - ipsec_main_t *im = &ipsec_main; - dpdk_esp_main_t *em = &dpdk_esp_main; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - esp_footer_t * f0; - u32 bi0, next0, icv_size, iv_size; - vlib_buffer_t * b0 = 0; - ip4_header_t *ih4 = 0, *oh4 = 0; - ip6_header_t *ih6 = 0, *oh6 = 0; - u8 tunnel_mode = 1; - u8 transport_ip6 = 0; - - next0 = ESP_DECRYPT_NEXT_DROP; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - sa_index0 = vnet_buffer(b0)->ipsec.sad_index; - sa0 = pool_elt_at_index (im->sad, sa_index0); - - to_next[0] = bi0; - to_next += 1; - - icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; - iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; - - if (sa0->use_anti_replay) - { - esp_header_t * esp0 = vlib_buffer_get_current (b0); - u32 seq; - seq = clib_host_to_net_u32(esp0->seq); - if (PREDICT_TRUE(sa0->use_esn)) - esp_replay_advance_esn(sa0, seq); - else - esp_replay_advance(sa0, seq); - } - - ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t)); - vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size); - - b0->current_length -= (icv_size + 2); - b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; - f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) + - b0->current_length); - b0->current_length -= f0->pad_length; - - /* transport mode */ - if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6)) - { - tunnel_mode = 0; - - if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40)) - { - if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60)) - transport_ip6 = 1; - else - { - clib_warning("next header: 0x%x", f0->next_header); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_NOT_IP, 1); - goto trace; - } - } - } - - if (PREDICT_TRUE (tunnel_mode)) - { - if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP)) - next0 = ESP_DECRYPT_NEXT_IP4_INPUT; - else if (f0->next_header == IP_PROTOCOL_IPV6) - next0 = ESP_DECRYPT_NEXT_IP6_INPUT; - else - { - clib_warning("next header: 0x%x", f0->next_header); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_DECRYPTION_FAILED, - 1); - goto trace; - } - } - /* transport mode */ - else - { - if (PREDICT_FALSE(transport_ip6)) - { - ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t)); - vlib_buffer_advance (b0, -sizeof(ip6_header_t)); - oh6 = vlib_buffer_get_current (b0); - memmove(oh6, ih6, sizeof(ip6_header_t)); - - next0 = ESP_DECRYPT_NEXT_IP6_INPUT; - oh6->protocol = f0->next_header; - oh6->payload_length = - clib_host_to_net_u16 ( - vlib_buffer_length_in_chain(vm, b0) - - sizeof (ip6_header_t)); - } - else - { - vlib_buffer_advance (b0, -sizeof(ip4_header_t)); - oh4 = vlib_buffer_get_current (b0); - memmove(oh4, ih4, sizeof(ip4_header_t)); - - next0 = ESP_DECRYPT_NEXT_IP4_INPUT; - oh4->ip_version_and_header_length = 0x45; - oh4->fragment_id = 0; - oh4->flags_and_fragment_offset = 0; - oh4->protocol = f0->next_header; - oh4->length = clib_host_to_net_u16 ( - vlib_buffer_length_in_chain (vm, b0)); - oh4->checksum = ip4_header_checksum (oh4); - } - } - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0; - -trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index, - ESP_DECRYPT_POST_ERROR_PKTS, - from_frame->n_vectors); - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = { - .function = dpdk_esp_decrypt_post_node_fn, - .name = "dpdk-esp-decrypt-post", - .vector_size = sizeof (u32), - .format_trace = format_esp_decrypt_post_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(esp_decrypt_post_error_strings), - .error_strings = esp_decrypt_post_error_strings, - - .n_next_nodes = ESP_DECRYPT_N_NEXT, - .next_nodes = { -#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, - foreach_esp_decrypt_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_post_node, dpdk_esp_decrypt_post_node_fn) diff --git a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c b/src/vnet/devices/dpdk/ipsec/esp_encrypt.c deleted file mode 100644 index 6eb1afc9..00000000 --- a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c +++ /dev/null @@ -1,609 +0,0 @@ -/* - * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev - * - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include - -#define foreach_esp_encrypt_next \ -_(DROP, "error-drop") \ -_(IP4_LOOKUP, "ip4-lookup") \ -_(IP6_LOOKUP, "ip6-lookup") \ -_(INTERFACE_OUTPUT, "interface-output") - -#define _(v, s) ESP_ENCRYPT_NEXT_##v, -typedef enum -{ - foreach_esp_encrypt_next -#undef _ - ESP_ENCRYPT_N_NEXT, -} esp_encrypt_next_t; - -#define foreach_esp_encrypt_error \ - _(RX_PKTS, "ESP pkts received") \ - _(SEQ_CYCLED, "sequence number cycled") \ - _(ENQ_FAIL, "Enqueue failed (buffer full)") \ - _(NO_CRYPTODEV, "Cryptodev not configured") \ - _(UNSUPPORTED, "Cipher/Auth not supported") - - -typedef enum -{ -#define _(sym,str) ESP_ENCRYPT_ERROR_##sym, - foreach_esp_encrypt_error -#undef _ - ESP_ENCRYPT_N_ERROR, -} esp_encrypt_error_t; - -static char *esp_encrypt_error_strings[] = { -#define _(sym,string) string, - foreach_esp_encrypt_error -#undef _ -}; - -vlib_node_registration_t dpdk_esp_encrypt_node; - -typedef struct -{ - u32 spi; - u32 seq; - ipsec_crypto_alg_t crypto_alg; - ipsec_integ_alg_t integ_alg; -} esp_encrypt_trace_t; - -/* packet trace format function */ -static u8 * -format_esp_encrypt_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *); - - s = format (s, "esp: spi %u seq %u crypto %U integrity %U", - t->spi, t->seq, - format_ipsec_crypto_alg, t->crypto_alg, - format_ipsec_integ_alg, t->integ_alg); - return s; -} - -static uword -dpdk_esp_encrypt_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next, next_index; - ipsec_main_t *im = &ipsec_main; - u32 cpu_index = os_get_cpu_number (); - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - dpdk_esp_main_t *em = &dpdk_esp_main; - u32 i; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - if (PREDICT_FALSE (!dcm->workers_main)) - { - /* Likely there are not enough cryptodevs, so drop frame */ - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_NO_CRYPTODEV, - n_left_from); - vlib_buffer_free (vm, from, n_left_from); - return n_left_from; - } - - crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index); - u32 n_qps = vec_len (cwm->qp_data); - struct rte_crypto_op **cops_to_enq[n_qps]; - u32 n_cop_qp[n_qps], *bi_to_enq[n_qps]; - - for (i = 0; i < n_qps; i++) - { - bi_to_enq[i] = cwm->qp_data[i].bi; - cops_to_enq[i] = cwm->qp_data[i].cops; - } - - memset (n_cop_qp, 0, n_qps * sizeof (u32)); - - crypto_alloc_cops (); - - next_index = ESP_ENCRYPT_NEXT_DROP; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, next0; - vlib_buffer_t *b0 = 0; - u32 sa_index0; - ipsec_sa_t *sa0; - ip4_and_esp_header_t *ih0, *oh0 = 0; - ip6_and_esp_header_t *ih6_0, *oh6_0 = 0; - struct rte_mbuf *mb0 = 0; - esp_footer_t *f0; - u8 is_ipv6; - u8 ip_hdr_size; - u8 next_hdr_type; - u8 transport_mode = 0; - const int BLOCK_SIZE = 16; - u32 iv_size; - u16 orig_sz; - crypto_sa_session_t *sa_sess; - void *sess; - struct rte_crypto_op *cop = 0; - u16 qp_index; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - sa_index0 = vnet_buffer (b0)->ipsec.sad_index; - sa0 = pool_elt_at_index (im->sad, sa_index0); - - if (PREDICT_FALSE (esp_seq_advance (sa0))) - { - clib_warning ("sequence number counter has cycled SPI %u", - sa0->spi); - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1); - //TODO: rekey SA - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - - sa0->total_data_size += b0->current_length; - - sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0); - if (PREDICT_FALSE (!sa_sess->sess)) - { - int ret = create_sym_sess (sa0, sa_sess, 1); - - if (PREDICT_FALSE (ret)) - { - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - } - - qp_index = sa_sess->qp_index; - sess = sa_sess->sess; - - ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); - cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); - ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); - - cops_to_enq[qp_index][0] = cop; - cops_to_enq[qp_index] += 1; - n_cop_qp[qp_index] += 1; - bi_to_enq[qp_index][0] = bi0; - bi_to_enq[qp_index] += 1; - - ssize_t adv; - iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; - ih0 = vlib_buffer_get_current (b0); - orig_sz = b0->current_length; - is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60; - /* is ipv6 */ - if (PREDICT_TRUE (sa0->is_tunnel)) - { - if (PREDICT_TRUE (!is_ipv6)) - adv = -sizeof (ip4_and_esp_header_t); - else - adv = -sizeof (ip6_and_esp_header_t); - } - else - { - adv = -sizeof (esp_header_t); - if (PREDICT_TRUE (!is_ipv6)) - orig_sz -= sizeof (ip4_header_t); - else - orig_sz -= sizeof (ip6_header_t); - } - - /*transport mode save the eth header before it is overwritten */ - if (PREDICT_FALSE (!sa0->is_tunnel)) - { - ethernet_header_t *ieh0 = (ethernet_header_t *) - ((u8 *) vlib_buffer_get_current (b0) - - sizeof (ethernet_header_t)); - ethernet_header_t *oeh0 = - (ethernet_header_t *) ((u8 *) ieh0 + (adv - iv_size)); - clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t)); - } - - vlib_buffer_advance (b0, adv - iv_size); - - /* XXX IP6/ip4 and IP4/IP6 not supported, only IP4/IP4 and IP6/IP6 */ - - /* is ipv6 */ - if (PREDICT_FALSE (is_ipv6)) - { - ih6_0 = (ip6_and_esp_header_t *) ih0; - ip_hdr_size = sizeof (ip6_header_t); - oh6_0 = vlib_buffer_get_current (b0); - - if (PREDICT_TRUE (sa0->is_tunnel)) - { - next_hdr_type = IP_PROTOCOL_IPV6; - oh6_0->ip6.ip_version_traffic_class_and_flow_label = - ih6_0->ip6.ip_version_traffic_class_and_flow_label; - } - else - { - next_hdr_type = ih6_0->ip6.protocol; - memmove (oh6_0, ih6_0, sizeof (ip6_header_t)); - } - - oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP; - oh6_0->ip6.hop_limit = 254; - oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi); - oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq); - } - else - { - ip_hdr_size = sizeof (ip4_header_t); - oh0 = vlib_buffer_get_current (b0); - - if (PREDICT_TRUE (sa0->is_tunnel)) - { - next_hdr_type = IP_PROTOCOL_IP_IN_IP; - oh0->ip4.tos = ih0->ip4.tos; - } - else - { - next_hdr_type = ih0->ip4.protocol; - memmove (oh0, ih0, sizeof (ip4_header_t)); - } - - oh0->ip4.ip_version_and_header_length = 0x45; - oh0->ip4.fragment_id = 0; - oh0->ip4.flags_and_fragment_offset = 0; - oh0->ip4.ttl = 254; - oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP; - oh0->esp.spi = clib_net_to_host_u32 (sa0->spi); - oh0->esp.seq = clib_net_to_host_u32 (sa0->seq); - } - - if (PREDICT_TRUE (sa0->is_tunnel && !sa0->is_tunnel_ip6)) - { - oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32; - oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32; - - /* in tunnel mode send it back to FIB */ - next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - } - else if (sa0->is_tunnel && sa0->is_tunnel_ip6) - { - oh6_0->ip6.src_address.as_u64[0] = - sa0->tunnel_src_addr.ip6.as_u64[0]; - oh6_0->ip6.src_address.as_u64[1] = - sa0->tunnel_src_addr.ip6.as_u64[1]; - oh6_0->ip6.dst_address.as_u64[0] = - sa0->tunnel_dst_addr.ip6.as_u64[0]; - oh6_0->ip6.dst_address.as_u64[1] = - sa0->tunnel_dst_addr.ip6.as_u64[1]; - - /* in tunnel mode send it back to FIB */ - next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - } - else - { - next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; - transport_mode = 1; - } - - ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG); - ASSERT (sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE); - - int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE; - - /* pad packet in input buffer */ - u8 pad_bytes = BLOCK_SIZE * blocks - 2 - orig_sz; - u8 i; - u8 *padding = vlib_buffer_get_current (b0) + b0->current_length; - - for (i = 0; i < pad_bytes; ++i) - padding[i] = i + 1; - - f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes; - f0->pad_length = pad_bytes; - f0->next_header = next_hdr_type; - b0->current_length += pad_bytes + 2 + - em->esp_integ_algs[sa0->integ_alg].trunc_size; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = - vnet_buffer (b0)->sw_if_index[VLIB_RX]; - b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; - - struct rte_crypto_sym_op *sym_cop; - sym_cop = (struct rte_crypto_sym_op *) (cop + 1); - - dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *) (sym_cop + 1); - - vnet_buffer (b0)->unused[0] = next0; - - mb0 = rte_mbuf_from_vlib_buffer (b0); - mb0->data_len = b0->current_length; - mb0->pkt_len = b0->current_length; - mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; - - rte_crypto_op_attach_sym_session (cop, sess); - - sym_cop->m_src = mb0; - - dpdk_gcm_cnt_blk *icb = &priv->cb; - icb->salt = sa0->salt; - icb->iv[0] = sa0->seq; - icb->iv[1] = sa0->seq_hi; - - if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - icb->cnt = clib_host_to_net_u32 (1); - clib_memcpy (vlib_buffer_get_current (b0) + ip_hdr_size + - sizeof (esp_header_t), icb->iv, 8); - sym_cop->cipher.data.offset = - ip_hdr_size + sizeof (esp_header_t) + iv_size; - sym_cop->cipher.data.length = BLOCK_SIZE * blocks; - sym_cop->cipher.iv.length = 16; - } - else - { - sym_cop->cipher.data.offset = - ip_hdr_size + sizeof (esp_header_t); - sym_cop->cipher.data.length = BLOCK_SIZE * blocks + iv_size; - sym_cop->cipher.iv.length = iv_size; - } - - sym_cop->cipher.iv.data = (u8 *) icb; - sym_cop->cipher.iv.phys_addr = cop->phys_addr + (uintptr_t) icb - - (uintptr_t) cop; - - - ASSERT (sa0->integ_alg < IPSEC_INTEG_N_ALG); - ASSERT (sa0->integ_alg != IPSEC_INTEG_ALG_NONE); - - if (PREDICT_FALSE (sa0->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)) - { - u8 *aad = priv->aad; - clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size, - 8); - sym_cop->auth.aad.data = aad; - sym_cop->auth.aad.phys_addr = cop->phys_addr + - (uintptr_t) aad - (uintptr_t) cop; - - if (PREDICT_FALSE (sa0->use_esn)) - { - *((u32 *) & aad[8]) = sa0->seq_hi; - sym_cop->auth.aad.length = 12; - } - else - { - sym_cop->auth.aad.length = 8; - } - } - else - { - sym_cop->auth.data.offset = ip_hdr_size; - sym_cop->auth.data.length = b0->current_length - ip_hdr_size - - em->esp_integ_algs[sa0->integ_alg].trunc_size; - - if (PREDICT_FALSE (sa0->use_esn)) - { - u8 *payload_end = - vlib_buffer_get_current (b0) + b0->current_length; - *((u32 *) payload_end) = sa0->seq_hi; - sym_cop->auth.data.length += sizeof (sa0->seq_hi); - } - } - sym_cop->auth.digest.data = vlib_buffer_get_current (b0) + - b0->current_length - - em->esp_integ_algs[sa0->integ_alg].trunc_size; - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset (mb0, - b0->current_length - - - em->esp_integ_algs - [sa0->integ_alg].trunc_size); - sym_cop->auth.digest.length = - em->esp_integ_algs[sa0->integ_alg].trunc_size; - - - if (PREDICT_FALSE (is_ipv6)) - { - oh6_0->ip6.payload_length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (ip6_header_t)); - } - else - { - oh0->ip4.length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4); - } - - if (transport_mode) - vlib_buffer_advance (b0, -sizeof (ethernet_header_t)); - - trace: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - esp_encrypt_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->spi = sa0->spi; - tr->seq = sa0->seq - 1; - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_RX_PKTS, - from_frame->n_vectors); - crypto_qp_data_t *qpd; - /* *INDENT-OFF* */ - vec_foreach_index (i, cwm->qp_data) - { - u32 enq; - - qpd = vec_elt_at_index(cwm->qp_data, i); - enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, - qpd->cops, n_cop_qp[i]); - qpd->inflights += enq; - - if (PREDICT_FALSE(enq < n_cop_qp[i])) - { - crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); - vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); - - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_ENQ_FAIL, - n_cop_qp[i] - enq); - } - } - /* *INDENT-ON* */ - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) = -{ - .function = dpdk_esp_encrypt_node_fn,.name = "dpdk-esp-encrypt",.flags = - VLIB_NODE_FLAG_IS_OUTPUT,.vector_size = sizeof (u32),.format_trace = - format_esp_encrypt_trace,.n_errors = - ARRAY_LEN (esp_encrypt_error_strings),.error_strings = - esp_encrypt_error_strings,.n_next_nodes = 1,.next_nodes = - { - [ESP_ENCRYPT_NEXT_DROP] = "error-drop",} -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn) -/* - * ESP Encrypt Post Node - */ -#define foreach_esp_encrypt_post_error \ - _(PKTS, "ESP post pkts") - typedef enum - { -#define _(sym,str) ESP_ENCRYPT_POST_ERROR_##sym, - foreach_esp_encrypt_post_error -#undef _ - ESP_ENCRYPT_POST_N_ERROR, - } esp_encrypt_post_error_t; - - static char *esp_encrypt_post_error_strings[] = { -#define _(sym,string) string, - foreach_esp_encrypt_post_error -#undef _ - }; - -vlib_node_registration_t dpdk_esp_encrypt_post_node; - -static u8 * -format_esp_encrypt_post_trace (u8 * s, va_list * args) -{ - return s; -} - -static uword -dpdk_esp_encrypt_post_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next = 0, next_index; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, next0; - vlib_buffer_t *b0 = 0; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - to_next[0] = bi0; - to_next += 1; - - next0 = vnet_buffer (b0)->unused[0]; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, bi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, dpdk_esp_encrypt_post_node.index, - ESP_ENCRYPT_POST_ERROR_PKTS, - from_frame->n_vectors); - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_encrypt_post_node) = -{ - .function = dpdk_esp_encrypt_post_node_fn,.name = - "dpdk-esp-encrypt-post",.vector_size = sizeof (u32),.format_trace = - format_esp_encrypt_post_trace,.type = VLIB_NODE_TYPE_INTERNAL,.n_errors = - ARRAY_LEN (esp_encrypt_post_error_strings),.error_strings = - esp_encrypt_post_error_strings,.n_next_nodes = - ESP_ENCRYPT_N_NEXT,.next_nodes = - { -#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n, - foreach_esp_encrypt_next -#undef _ - } -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_post_node, - dpdk_esp_encrypt_post_node_fn) -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/ipsec.c b/src/vnet/devices/dpdk/ipsec/ipsec.c deleted file mode 100644 index 05c17c99..00000000 --- a/src/vnet/devices/dpdk/ipsec/ipsec.c +++ /dev/null @@ -1,430 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include - -#define DPDK_CRYPTO_NB_SESS_OBJS 20000 -#define DPDK_CRYPTO_CACHE_SIZE 512 -#define DPDK_CRYPTO_PRIV_SIZE 128 -#define DPDK_CRYPTO_N_QUEUE_DESC 1024 -#define DPDK_CRYPTO_NB_COPS (1024 * 4) - -static int -add_del_sa_sess (u32 sa_index, u8 is_add) -{ - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm; - u8 skip_master = vlib_num_workers () > 0; - - /* *INDENT-OFF* */ - vec_foreach (cwm, dcm->workers_main) - { - crypto_sa_session_t *sa_sess; - u8 is_outbound; - - if (skip_master) - { - skip_master = 0; - continue; - } - - for (is_outbound = 0; is_outbound < 2; is_outbound++) - { - if (is_add) - { - pool_get (cwm->sa_sess_d[is_outbound], sa_sess); - } - else - { - u8 dev_id; - - sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index); - dev_id = cwm->qp_data[sa_sess->qp_index].dev_id; - - if (!sa_sess->sess) - continue; - - if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess)) - { - clib_warning("failed to free session"); - return -1; - } - memset(sa_sess, 0, sizeof(sa_sess[0])); - } - } - } - /* *INDENT-OFF* */ - - return 0; -} - -static void -update_qp_data (crypto_worker_main_t * cwm, - u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx) -{ - crypto_qp_data_t *qpd; - - /* *INDENT-OFF* */ - vec_foreach_index (*idx, cwm->qp_data) - { - qpd = vec_elt_at_index(cwm->qp_data, *idx); - - if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id && - qpd->is_outbound == is_outbound) - return; - } - /* *INDENT-ON* */ - - vec_add2 (cwm->qp_data, qpd, 1); - - qpd->dev_id = cdev_id; - qpd->qp_id = qp_id; - qpd->is_outbound = is_outbound; -} - -/* - * return: - * 0: already exist - * 1: mapped - */ -static int -add_mapping (crypto_worker_main_t * cwm, - u8 cdev_id, u16 qp, u8 is_outbound, - const struct rte_cryptodev_capabilities *cipher_cap, - const struct rte_cryptodev_capabilities *auth_cap) -{ - u16 qp_index; - uword key = 0, data, *ret; - crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; - - p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo; - p_key->auth_algo = (u8) auth_cap->sym.auth.algo; - p_key->is_outbound = is_outbound; - - ret = hash_get (cwm->algo_qp_map, key); - if (ret) - return 0; - - update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index); - - data = (uword) qp_index; - hash_set (cwm->algo_qp_map, key, data); - - return 1; -} - -/* - * return: - * 0: already exist - * 1: mapped - */ -static int -add_cdev_mapping (crypto_worker_main_t * cwm, - struct rte_cryptodev_info *dev_info, u8 cdev_id, - u16 qp, u8 is_outbound) -{ - const struct rte_cryptodev_capabilities *i, *j; - u32 mapped = 0; - - for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++) - { - if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER) - continue; - - if (check_algo_is_supported (i, NULL) != 0) - continue; - - for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; - j++) - { - if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH) - continue; - - if (check_algo_is_supported (j, NULL) != 0) - continue; - - mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, j); - } - } - - return mapped; -} - -static int -check_cryptodev_queues () -{ - u32 n_qs = 0; - u8 cdev_id; - u32 n_req_qs = 2; - - if (vlib_num_workers () > 0) - n_req_qs = vlib_num_workers () * 2; - - for (cdev_id = 0; cdev_id < rte_cryptodev_count (); cdev_id++) - { - struct rte_cryptodev_info cdev_info; - - rte_cryptodev_info_get (cdev_id, &cdev_info); - - if (! - (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) - continue; - - n_qs += cdev_info.max_nb_queue_pairs; - } - - if (n_qs >= n_req_qs) - return 0; - else - return -1; -} - -static clib_error_t * -dpdk_ipsec_check_support (ipsec_sa_t * sa) -{ - if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) - return clib_error_return (0, "unsupported integ-alg %U with " - "crypto-algo aes-gcm-128", - format_ipsec_integ_alg, sa->integ_alg); - sa->integ_alg = IPSEC_INTEG_ALG_AES_GCM_128; - } - else - { - if (sa->integ_alg == IPSEC_INTEG_ALG_NONE || - sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) - return clib_error_return (0, "unsupported integ-alg %U", - format_ipsec_integ_alg, sa->integ_alg); - } - - return 0; -} - -static uword -dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, - vlib_frame_t * f) -{ - dpdk_config_main_t *conf = &dpdk_config_main; - ipsec_main_t *im = &ipsec_main; - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - struct rte_cryptodev_config dev_conf; - struct rte_cryptodev_qp_conf qp_conf; - struct rte_cryptodev_info cdev_info; - struct rte_mempool *rmp; - i32 dev_id, ret; - u32 i, skip_master; - - if (!conf->cryptodev) - { - clib_warning ("DPDK Cryptodev support is disabled, " - "default to OpenSSL IPsec"); - return 0; - } - - if (check_cryptodev_queues () < 0) - { - conf->cryptodev = 0; - clib_warning ("not enough Cryptodevs, default to OpenSSL IPsec"); - return 0; - } - - vec_alloc (dcm->workers_main, tm->n_vlib_mains); - _vec_len (dcm->workers_main) = tm->n_vlib_mains; - - fprintf (stdout, "DPDK Cryptodevs info:\n"); - fprintf (stdout, "dev_id\tn_qp\tnb_obj\tcache_size\n"); - /* HW cryptodevs have higher dev_id, use HW first */ - for (dev_id = rte_cryptodev_count () - 1; dev_id >= 0; dev_id--) - { - u16 max_nb_qp, qp = 0; - skip_master = vlib_num_workers () > 0; - - rte_cryptodev_info_get (dev_id, &cdev_info); - - if (! - (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) - continue; - - max_nb_qp = cdev_info.max_nb_queue_pairs; - - for (i = 0; i < tm->n_vlib_mains; i++) - { - u8 is_outbound; - crypto_worker_main_t *cwm; - uword *map; - - if (skip_master) - { - skip_master = 0; - continue; - } - - cwm = vec_elt_at_index (dcm->workers_main, i); - map = cwm->algo_qp_map; - - if (!map) - { - map = hash_create (0, sizeof (crypto_worker_qp_key_t)); - if (!map) - { - clib_warning ("unable to create hash table for worker %u", - vlib_mains[i]->cpu_index); - goto error; - } - cwm->algo_qp_map = map; - } - - for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp; - is_outbound++) - qp += add_cdev_mapping (cwm, &cdev_info, dev_id, qp, is_outbound); - } - - if (qp == 0) - continue; - - dev_conf.socket_id = rte_cryptodev_socket_id (dev_id); - dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs; - dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS; - dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE; - - ret = rte_cryptodev_configure (dev_id, &dev_conf); - if (ret < 0) - { - clib_warning ("cryptodev %u config error", dev_id); - goto error; - } - - qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC; - for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) - { - ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf, - dev_conf.socket_id); - if (ret < 0) - { - clib_warning ("cryptodev %u qp %u setup error", dev_id, qp); - goto error; - } - } - vec_validate_aligned (dcm->cop_pools, dev_conf.socket_id, - CLIB_CACHE_LINE_BYTES); - - if (!vec_elt (dcm->cop_pools, dev_conf.socket_id)) - { - u8 *pool_name = format (0, "crypto_op_pool_socket%u%c", - dev_conf.socket_id, 0); - - rmp = rte_crypto_op_pool_create ((char *) pool_name, - RTE_CRYPTO_OP_TYPE_SYMMETRIC, - DPDK_CRYPTO_NB_COPS * - (1 + vlib_num_workers ()), - DPDK_CRYPTO_CACHE_SIZE, - DPDK_CRYPTO_PRIV_SIZE, - dev_conf.socket_id); - vec_free (pool_name); - - if (!rmp) - { - clib_warning ("failed to allocate mempool on socket %u", - dev_conf.socket_id); - goto error; - } - vec_elt (dcm->cop_pools, dev_conf.socket_id) = rmp; - } - - fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs, - DPDK_CRYPTO_NB_SESS_OBJS, DPDK_CRYPTO_CACHE_SIZE); - } - - dpdk_esp_init (); - - /* Add new next node and set as default */ - vlib_node_t *node, *next_node; - - next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-encrypt"); - ASSERT (next_node); - node = vlib_get_node_by_name (vm, (u8 *) "ipsec-output-ip4"); - ASSERT (node); - im->esp_encrypt_node_index = next_node->index; - im->esp_encrypt_next_index = - vlib_node_add_next (vm, node->index, next_node->index); - - next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-decrypt"); - ASSERT (next_node); - node = vlib_get_node_by_name (vm, (u8 *) "ipsec-input-ip4"); - ASSERT (node); - im->esp_decrypt_node_index = next_node->index; - im->esp_decrypt_next_index = - vlib_node_add_next (vm, node->index, next_node->index); - - im->cb.check_support_cb = dpdk_ipsec_check_support; - im->cb.add_del_sa_sess_cb = add_del_sa_sess; - - if (vec_len (vlib_mains) == 0) - vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index, - VLIB_NODE_STATE_POLLING); - else - for (i = 1; i < tm->n_vlib_mains; i++) - vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, - VLIB_NODE_STATE_POLLING); - - /* TODO cryptodev counters */ - - return 0; - -error: - ; - crypto_worker_main_t *cwm; - struct rte_mempool **mp; - /* *INDENT-OFF* */ - vec_foreach (cwm, dcm->workers_main) - hash_free (cwm->algo_qp_map); - - vec_foreach (mp, dcm->cop_pools) - { - if (mp) - rte_mempool_free (mp[0]); - } - /* *INDENT-ON* */ - vec_free (dcm->workers_main); - vec_free (dcm->cop_pools); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_ipsec_process_node,static) = { - .function = dpdk_ipsec_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "dpdk-ipsec-process", - .process_log2_n_stack_bytes = 17, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/ipsec.h b/src/vnet/devices/dpdk/ipsec/ipsec.h deleted file mode 100644 index 3465b361..00000000 --- a/src/vnet/devices/dpdk/ipsec/ipsec.h +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __DPDK_IPSEC_H__ -#define __DPDK_IPSEC_H__ - -#include - -#undef always_inline -#include -#include - -#if CLIB_DEBUG > 0 -#define always_inline static inline -#else -#define always_inline static inline __attribute__ ((__always_inline__)) -#endif - - -#define MAX_QP_PER_LCORE 16 - -typedef struct -{ - u32 salt; - u32 iv[2]; - u32 cnt; -} dpdk_gcm_cnt_blk; - -typedef struct -{ - dpdk_gcm_cnt_blk cb; - union - { - u8 aad[12]; - u8 icv[64]; - }; -} dpdk_cop_priv_t; - -typedef struct -{ - u8 cipher_algo; - u8 auth_algo; - u8 is_outbound; -} crypto_worker_qp_key_t; - -typedef struct -{ - u16 dev_id; - u16 qp_id; - u16 is_outbound; - i16 inflights; - u32 bi[VLIB_FRAME_SIZE]; - struct rte_crypto_op *cops[VLIB_FRAME_SIZE]; - struct rte_crypto_op **free_cops; -} crypto_qp_data_t; - -typedef struct -{ - u8 qp_index; - void *sess; -} crypto_sa_session_t; - -typedef struct -{ - crypto_sa_session_t *sa_sess_d[2]; - crypto_qp_data_t *qp_data; - uword *algo_qp_map; -} crypto_worker_main_t; - -typedef struct -{ - struct rte_mempool **cop_pools; - crypto_worker_main_t *workers_main; -} dpdk_crypto_main_t; - -dpdk_crypto_main_t dpdk_crypto_main; - -extern vlib_node_registration_t dpdk_crypto_input_node; - -#define CRYPTO_N_FREE_COPS (VLIB_FRAME_SIZE * 3) - -static_always_inline void -crypto_alloc_cops () -{ - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - u32 cpu_index = os_get_cpu_number (); - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - unsigned socket_id = rte_socket_id (); - crypto_qp_data_t *qpd; - - /* *INDENT-OFF* */ - vec_foreach (qpd, cwm->qp_data) - { - u32 l = vec_len (qpd->free_cops); - - if (PREDICT_FALSE (l < VLIB_FRAME_SIZE)) - { - u32 n_alloc; - - if (PREDICT_FALSE (!qpd->free_cops)) - vec_alloc (qpd->free_cops, CRYPTO_N_FREE_COPS); - - n_alloc = rte_crypto_op_bulk_alloc (dcm->cop_pools[socket_id], - RTE_CRYPTO_OP_TYPE_SYMMETRIC, - &qpd->free_cops[l], - CRYPTO_N_FREE_COPS - l - 1); - - _vec_len (qpd->free_cops) = l + n_alloc; - } - } - /* *INDENT-ON* */ -} - -static_always_inline void -crypto_free_cop (crypto_qp_data_t * qpd, struct rte_crypto_op **cops, u32 n) -{ - u32 l = vec_len (qpd->free_cops); - - if (l + n >= CRYPTO_N_FREE_COPS) - { - l -= VLIB_FRAME_SIZE; - rte_mempool_put_bulk (cops[0]->mempool, - (void **) &qpd->free_cops[l], VLIB_FRAME_SIZE); - } - clib_memcpy (&qpd->free_cops[l], cops, sizeof (*cops) * n); - - _vec_len (qpd->free_cops) = l + n; -} - -static_always_inline int -check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, - char *name) -{ - struct - { - uint8_t cipher_algo; - enum rte_crypto_sym_xform_type type; - union - { - enum rte_crypto_auth_algorithm auth; - enum rte_crypto_cipher_algorithm cipher; - }; - char *name; - } supported_algo[] = - { - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_NULL,.name = "NULL"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_CTR,.name = "AES_CTR"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_3DES_CBC,.name = "3DES-CBC"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA256_HMAC,.name = "HMAC-SHA256"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA384_HMAC,.name = "HMAC-SHA384"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_AES_XCBC_MAC,.name = "AES-XCBC-MAC"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"}, - { - /* tail */ - .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED},}; - uint32_t i = 0; - - if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) - return -1; - - while (supported_algo[i].type != RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED) - { - if (cap->sym.xform_type == supported_algo[i].type) - { - if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER && - cap->sym.cipher.algo == supported_algo[i].cipher) || - (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH && - cap->sym.auth.algo == supported_algo[i].auth)) - { - if (name) - strcpy (name, supported_algo[i].name); - return 0; - } - } - - i++; - } - - return -1; -} - -#endif /* __DPDK_IPSEC_H__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/main.c b/src/vnet/devices/dpdk/main.c deleted file mode 100644 index 9ea3aa04..00000000 --- a/src/vnet/devices/dpdk/main.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - - -/* - * Called by the dpdk driver's rte_delay_us() function. - * Return 0 to have the dpdk do a regular delay loop. - * Return 1 if to skip the delay loop because we are suspending - * the calling vlib process instead. - */ -int -rte_delay_us_override (unsigned us) -{ - vlib_main_t *vm; - - /* Don't bother intercepting for short delays */ - if (us < 10) - return 0; - - /* - * Only intercept if we are in a vlib process. - * If we are called from a vlib worker thread or the vlib main - * thread then do not intercept. (Must not be called from an - * independent pthread). - */ - if (os_get_cpu_number () == 0) - { - /* - * We're in the vlib main thread or a vlib process. Make sure - * the process is running and we're not still initializing. - */ - vm = vlib_get_main (); - if (vlib_in_process_context (vm)) - { - /* Only suspend for the admin_down_process */ - vlib_process_t *proc = vlib_get_current_process (vm); - if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) || - (proc->node_runtime.function != admin_up_down_process)) - return 0; - - f64 delay = 1e-6 * us; - vlib_process_suspend (vm, delay); - return 1; - } - } - return 0; // no override -} - -static void -rte_delay_us_override_cb (unsigned us) -{ - if (rte_delay_us_override (us) == 0) - rte_delay_us_block (us); -} - -static clib_error_t * dpdk_main_init (vlib_main_t * vm) -{ - clib_error_t * error = 0; - - if ((error = vlib_call_init_function (vm, dpdk_init))) - return error; - - /* register custom delay function */ - rte_delay_us_callback_register (rte_delay_us_override_cb); - - return error; -} - -VLIB_INIT_FUNCTION (dpdk_main_init); - diff --git a/src/vnet/devices/dpdk/node.c b/src/vnet/devices/dpdk/node.c deleted file mode 100644 index 0d64ae08..00000000 --- a/src/vnet/devices/dpdk/node.c +++ /dev/null @@ -1,674 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dpdk_priv.h" - -static char *dpdk_error_strings[] = { -#define _(n,s) s, - foreach_dpdk_error -#undef _ -}; - -always_inline int -vlib_buffer_is_ip4 (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); -} - -always_inline int -vlib_buffer_is_ip6 (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); -} - -always_inline int -vlib_buffer_is_mpls (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); -} - -always_inline u32 -dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) -{ - if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0))) - if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0)) - return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; - else - return VNET_DEVICE_INPUT_NEXT_IP4_INPUT; - else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0))) - return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; - else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) - return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; - else - return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; -} - -always_inline int -dpdk_mbuf_is_vlan (struct rte_mbuf *mb) -{ - return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == - RTE_PTYPE_L2_ETHER_VLAN; -} - -always_inline int -dpdk_mbuf_is_ip4 (struct rte_mbuf *mb) -{ - return RTE_ETH_IS_IPV4_HDR (mb->packet_type) != 0; -} - -always_inline int -dpdk_mbuf_is_ip6 (struct rte_mbuf *mb) -{ - return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0; -} - -always_inline u32 -dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0) -{ - if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb))) - return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb))) - return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; - else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb))) - return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; - else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) - return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; - else - return dpdk_rx_next_from_etype (mb, b0); -} - -always_inline void -dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error) -{ - if (mb->ol_flags & PKT_RX_IP_CKSUM_BAD) - { - *error = DPDK_ERROR_IP_CHECKSUM_ERROR; - *next = VNET_DEVICE_INPUT_NEXT_DROP; - } - else - *error = DPDK_ERROR_NONE; -} - -void -dpdk_rx_trace (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, u32 * buffers, uword n_buffers) -{ - vlib_main_t *vm = vlib_get_main (); - u32 *b, n_left; - u32 next0; - - n_left = n_buffers; - b = buffers; - - while (n_left >= 1) - { - u32 bi0; - vlib_buffer_t *b0; - dpdk_rx_dma_trace_t *t0; - struct rte_mbuf *mb; - u8 error0; - - bi0 = b[0]; - n_left -= 1; - - b0 = vlib_get_buffer (vm, bi0); - mb = rte_mbuf_from_vlib_buffer (b0); - - if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) - next0 = xd->per_interface_next_index; - else if (PREDICT_TRUE - ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) - next0 = dpdk_rx_next_from_mb (mb, b0); - else - next0 = dpdk_rx_next_from_etype (mb, b0); - - dpdk_rx_error_from_mb (mb, &next0, &error0); - - vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->queue_index = queue_id; - t0->device_index = xd->device_index; - t0->buffer_index = bi0; - - clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); - clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - clib_memcpy (t0->buffer.pre_data, b0->data, - sizeof (t0->buffer.pre_data)); - clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data)); - - b += 1; - } -} - -static inline u32 -dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) -{ - u32 n_buffers; - u32 n_left; - u32 n_this_chunk; - - n_left = VLIB_FRAME_SIZE; - n_buffers = 0; - - if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) - { - while (n_left) - { - n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id, - xd->rx_vectors[queue_id] + - n_buffers, n_left); - n_buffers += n_this_chunk; - n_left -= n_this_chunk; - - /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */ - if (n_this_chunk < 32) - break; - } - } - else - { - ASSERT (0); - } - - return n_buffers; -} - - -static_always_inline void -dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, - struct rte_mbuf *mb, vlib_buffer_free_list_t * fl) -{ - u8 nb_seg = 1; - struct rte_mbuf *mb_seg = 0; - vlib_buffer_t *b_seg, *b_chain = 0; - mb_seg = mb->next; - b_chain = b; - - while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) - { - ASSERT (mb_seg != 0); - - b_seg = vlib_buffer_from_rte_mbuf (mb_seg); - vlib_buffer_init_for_free_list (b_seg, fl); - - ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - ASSERT (b_seg->current_data == 0); - - /* - * The driver (e.g. virtio) may not put the packet data at the start - * of the segment, so don't assume b_seg->current_data == 0 is correct. - */ - b_seg->current_data = - (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data; - - b_seg->current_length = mb_seg->data_len; - b->total_length_not_including_first_buffer += mb_seg->data_len; - - b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; - b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); - - b_chain = b_seg; - mb_seg = mb_seg->next; - nb_seg++; - } -} - -static_always_inline void -dpdk_prefetch_buffer (struct rte_mbuf *mb) -{ - vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE); -} - -/* - * This function is used when there are no worker threads. - * The main thread performs IO and forwards the packets. - */ -static_always_inline u32 -dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, - vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id) -{ - u32 n_buffers; - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - u32 n_left_to_next, *to_next; - u32 mb_index; - vlib_main_t *vm = vlib_get_main (); - uword n_rx_bytes = 0; - u32 n_trace, trace_cnt __attribute__ ((unused)); - vlib_buffer_free_list_t *fl; - u32 buffer_flags_template; - - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) - return 0; - - n_buffers = dpdk_rx_burst (dm, xd, queue_id); - - if (n_buffers == 0) - { - return 0; - } - - buffer_flags_template = dm->buffer_flags_template; - - vec_reset_length (xd->d_trace_buffers[cpu_index]); - trace_cnt = n_trace = vlib_get_trace_count (vm, node); - - if (n_trace > 0) - { - u32 n = clib_min (n_trace, n_buffers); - mb_index = 0; - - while (n--) - { - struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++]; - vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - vec_add1 (xd->d_trace_buffers[cpu_index], - vlib_get_buffer_index (vm, b)); - } - } - - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - - mb_index = 0; - - while (n_buffers > 0) - { - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 bi0, next0, l3_offset0; - u32 bi1, next1, l3_offset1; - u32 bi2, next2, l3_offset2; - u32 bi3, next3, l3_offset3; - u8 error0, error1, error2, error3; - u64 or_ol_flags; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_buffers > 8 && n_left_to_next > 4) - { - struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; - struct rte_mbuf *mb1 = xd->rx_vectors[queue_id][mb_index + 1]; - struct rte_mbuf *mb2 = xd->rx_vectors[queue_id][mb_index + 2]; - struct rte_mbuf *mb3 = xd->rx_vectors[queue_id][mb_index + 3]; - - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 4]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 5]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 6]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 7]); - - if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) - { - if (PREDICT_FALSE (mb0->nb_segs > 1)) - dpdk_prefetch_buffer (mb0->next); - if (PREDICT_FALSE (mb1->nb_segs > 1)) - dpdk_prefetch_buffer (mb1->next); - if (PREDICT_FALSE (mb2->nb_segs > 1)) - dpdk_prefetch_buffer (mb2->next); - if (PREDICT_FALSE (mb3->nb_segs > 1)) - dpdk_prefetch_buffer (mb3->next); - } - - ASSERT (mb0); - ASSERT (mb1); - ASSERT (mb2); - ASSERT (mb3); - - or_ol_flags = (mb0->ol_flags | mb1->ol_flags | - mb2->ol_flags | mb3->ol_flags); - b0 = vlib_buffer_from_rte_mbuf (mb0); - b1 = vlib_buffer_from_rte_mbuf (mb1); - b2 = vlib_buffer_from_rte_mbuf (mb2); - b3 = vlib_buffer_from_rte_mbuf (mb3); - - vlib_buffer_init_for_free_list (b0, fl); - vlib_buffer_init_for_free_list (b1, fl); - vlib_buffer_init_for_free_list (b2, fl); - vlib_buffer_init_for_free_list (b3, fl); - - bi0 = vlib_get_buffer_index (vm, b0); - bi1 = vlib_get_buffer_index (vm, b1); - bi2 = vlib_get_buffer_index (vm, b2); - bi3 = vlib_get_buffer_index (vm, b3); - - to_next[0] = bi0; - to_next[1] = bi1; - to_next[2] = bi2; - to_next[3] = bi3; - to_next += 4; - n_left_to_next -= 4; - - if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) - { - next0 = next1 = next2 = next3 = xd->per_interface_next_index; - } - else if (PREDICT_TRUE - ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) - { - next0 = dpdk_rx_next_from_mb (mb0, b0); - next1 = dpdk_rx_next_from_mb (mb1, b1); - next2 = dpdk_rx_next_from_mb (mb2, b2); - next3 = dpdk_rx_next_from_mb (mb3, b3); - } - else - { - next0 = dpdk_rx_next_from_etype (mb0, b0); - next1 = dpdk_rx_next_from_etype (mb1, b1); - next2 = dpdk_rx_next_from_etype (mb2, b2); - next3 = dpdk_rx_next_from_etype (mb3, b3); - } - - if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD)) - { - dpdk_rx_error_from_mb (mb0, &next0, &error0); - dpdk_rx_error_from_mb (mb1, &next1, &error1); - dpdk_rx_error_from_mb (mb2, &next2, &error2); - dpdk_rx_error_from_mb (mb3, &next3, &error3); - b0->error = node->errors[error0]; - b1->error = node->errors[error1]; - b2->error = node->errors[error2]; - b3->error = node->errors[error3]; - } - else - { - b0->error = b1->error = node->errors[DPDK_ERROR_NONE]; - b2->error = b3->error = node->errors[DPDK_ERROR_NONE]; - } - - l3_offset0 = device_input_next_node_advance[next0]; - l3_offset1 = device_input_next_node_advance[next1]; - l3_offset2 = device_input_next_node_advance[next2]; - l3_offset3 = device_input_next_node_advance[next3]; - - b0->current_data = l3_offset0 + mb0->data_off; - b1->current_data = l3_offset1 + mb1->data_off; - b2->current_data = l3_offset2 + mb2->data_off; - b3->current_data = l3_offset3 + mb3->data_off; - - b0->current_data -= RTE_PKTMBUF_HEADROOM; - b1->current_data -= RTE_PKTMBUF_HEADROOM; - b2->current_data -= RTE_PKTMBUF_HEADROOM; - b3->current_data -= RTE_PKTMBUF_HEADROOM; - - b0->current_length = mb0->data_len - l3_offset0; - b1->current_length = mb1->data_len - l3_offset1; - b2->current_length = mb2->data_len - l3_offset2; - b3->current_length = mb3->data_len - l3_offset3; - - b0->flags = buffer_flags_template; - b1->flags = buffer_flags_template; - b2->flags = buffer_flags_template; - b3->flags = buffer_flags_template; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b2)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b3)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b2)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b3)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - n_rx_bytes += mb0->pkt_len; - n_rx_bytes += mb1->pkt_len; - n_rx_bytes += mb2->pkt_len; - n_rx_bytes += mb3->pkt_len; - - /* Process subsequent segments of multi-segment packets */ - if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) - { - dpdk_process_subseq_segs (vm, b0, mb0, fl); - dpdk_process_subseq_segs (vm, b1, mb1, fl); - dpdk_process_subseq_segs (vm, b2, mb2, fl); - dpdk_process_subseq_segs (vm, b3, mb3, fl); - } - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... See main.c... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); - - /* Do we have any driver RX features configured on the interface? */ - vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index, - &next0, &next1, &next2, &next3, - b0, b1, b2, b3, - l3_offset0, l3_offset1, - l3_offset2, l3_offset3); - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - n_buffers -= 4; - mb_index += 4; - } - while (n_buffers > 0 && n_left_to_next > 0) - { - struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; - - ASSERT (mb0); - - b0 = vlib_buffer_from_rte_mbuf (mb0); - - /* Prefetch one next segment if it exists. */ - if (PREDICT_FALSE (mb0->nb_segs > 1)) - dpdk_prefetch_buffer (mb0->next); - - vlib_buffer_init_for_free_list (b0, fl); - - bi0 = vlib_get_buffer_index (vm, b0); - - to_next[0] = bi0; - to_next++; - n_left_to_next--; - - if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) - next0 = xd->per_interface_next_index; - else if (PREDICT_TRUE - ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) - next0 = dpdk_rx_next_from_mb (mb0, b0); - else - next0 = dpdk_rx_next_from_etype (mb0, b0); - - dpdk_rx_error_from_mb (mb0, &next0, &error0); - b0->error = node->errors[error0]; - - l3_offset0 = device_input_next_node_advance[next0]; - - b0->current_data = l3_offset0; - b0->current_data += mb0->data_off - RTE_PKTMBUF_HEADROOM; - b0->current_length = mb0->data_len - l3_offset0; - - b0->flags = buffer_flags_template; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - n_rx_bytes += mb0->pkt_len; - - /* Process subsequent segments of multi-segment packets */ - dpdk_process_subseq_segs (vm, b0, mb0, fl); - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... See main.c... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - - /* Do we have any driver RX features configured on the interface? */ - vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0, - b0, l3_offset0); - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - n_buffers--; - mb_index++; - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0)) - { - dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index], - vec_len (xd->d_trace_buffers[cpu_index])); - vlib_set_trace_count (vm, node, n_trace - - vec_len (xd->d_trace_buffers[cpu_index])); - } - - vlib_increment_combined_counter - (vnet_get_main ()->interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, - cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); - - vnet_device_increment_rx_packets (cpu_index, mb_index); - - return mb_index; -} - -static inline void -poll_rate_limit (dpdk_main_t * dm) -{ - /* Limit the poll rate by sleeping for N msec between polls */ - if (PREDICT_FALSE (dm->poll_sleep != 0)) - { - struct timespec ts, tsrem; - - ts.tv_sec = 0; - ts.tv_nsec = 1000 * 1000 * dm->poll_sleep; /* 1ms */ - - while (nanosleep (&ts, &tsrem) < 0) - { - ts = tsrem; - } - } -} - -/** \brief Main DPDK input node - @node dpdk-input - - This is the main DPDK input node: across each assigned interface, - call rte_eth_rx_burst(...) or similar to obtain a vector of - packets to process. Handle early packet discard. Derive @c - vlib_buffer_t metadata from struct rte_mbuf metadata, - Depending on the resulting metadata: adjust b->current_data, - b->current_length and dispatch directly to - ip4-input-no-checksum, or ip6-input. Trace the packet if required. - - @param vm vlib_main_t corresponding to the current thread - @param node vlib_node_runtime_t - @param f vlib_frame_t input-node, not used. - - @par Graph mechanics: buffer metadata, next index usage - - @em Uses: - - struct rte_mbuf mb->ol_flags - - PKT_RX_IP_CKSUM_BAD - - RTE_ETH_IS_xxx_HDR(mb->packet_type) - - packet classification result - - @em Sets: - - b->error if the packet is to be dropped immediately - - b->current_data, b->current_length - - adjusted as needed to skip the L2 header in direct-dispatch cases - - vnet_buffer(b)->sw_if_index[VLIB_RX] - - rx interface sw_if_index - - vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0 - - required by ipX-lookup - - b->flags - - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc. - - Next Nodes: - - Static arcs to: error-drop, ethernet-input, - ip4-input-no-checksum, ip6-input, mpls-input - - per-interface redirection, controlled by - xd->per_interface_next_index -*/ - -static uword -dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - uword n_rx_packets = 0; - dpdk_device_and_queue_t *dq; - u32 cpu_index = os_get_cpu_number (); - - /* - * Poll all devices on this cpu for input/interrupts. - */ - /* *INDENT-OFF* */ - vec_foreach (dq, dm->devices_by_cpu[cpu_index]) - { - xd = vec_elt_at_index(dm->devices, dq->device); - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); - } - /* *INDENT-ON* */ - - poll_rate_limit (dm); - - return n_rx_packets; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_input_node) = { - .function = dpdk_input, - .type = VLIB_NODE_TYPE_INPUT, - .name = "dpdk-input", - .sibling_of = "device-input", - - /* Will be enabled if/when hardware is detected. */ - .state = VLIB_NODE_STATE_DISABLED, - - .format_buffer = format_ethernet_header_with_length, - .format_trace = format_dpdk_rx_dma_trace, - - .n_errors = DPDK_N_ERROR, - .error_strings = dpdk_error_strings, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input); -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/qos_doc.md b/src/vnet/devices/dpdk/qos_doc.md deleted file mode 100644 index 7c064246..00000000 --- a/src/vnet/devices/dpdk/qos_doc.md +++ /dev/null @@ -1,411 +0,0 @@ -# QoS Hierarchical Scheduler {#qos_doc} - -The Quality-of-Service (QoS) scheduler performs egress-traffic management by -prioritizing the transmission of the packets of different type services and -subcribers based on the Service Level Agreements (SLAs). The QoS scheduler can -be enabled on one or more NIC output interfaces depending upon the -requirement. - - -## Overview - -The QoS schdeuler supports a number of scheduling and shaping levels which -construct hierarchical-tree. The first level in the hierarchy is port (i.e. -the physical interface) that constitutes the root node of the tree. The -subsequent level is subport which represents the group of the -users/subscribers. The individual user/subscriber is represented by the pipe -at the next level. Each user can have different traffic type based on the -criteria of specific loss rate, jitter, and latency. These traffic types are -represented at the traffic-class level in the form of different traffic- -classes. The last level contains number of queues which are grouped together -to host the packets of the specific class type traffic. - -The QoS scheduler implementation requires flow classification, enqueue and -dequeue operations. The flow classification is mandatory stage for HQoS where -incoming packets are classified by mapping the packet fields information to -5-tuple (HQoS subport, pipe, traffic class, queue within traffic class, and -color) and storing that information in mbuf sched field. The enqueue operation -uses this information to determine the queue for storing the packet, and at -this stage, if the specific queue is full, QoS drops the packet. The dequeue -operation consists of scheduling the packet based on its length and available -credits, and handing over the scheduled packet to the output interface. - -For more information on QoS Scheduler, please refer DPDK Programmer's Guide- -http://dpdk.org/doc/guides/prog_guide/qos_framework.html - - -### QoS Schdeuler Parameters - -Following illustrates the default HQoS configuration for each 10GbE output -port: - -Single subport (subport 0): - - Subport rate set to 100% of port rate - - Each of the 4 traffic classes has rate set to 100% of port rate - -4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration: - - Pipe rate set to 1/4K of port rate - - Each of the 4 traffic classes has rate set to 100% of pipe rate - - Within each traffic class, the byte-level WRR weights for the 4 queues are set to 1:1:1:1 - - -#### Port configuration - -``` -port { - rate 1250000000 /* Assuming 10GbE port */ - frame_overhead 24 /* Overhead fields per Ethernet frame: - * 7B (Preamble) + - * 1B (Start of Frame Delimiter (SFD)) + - * 4B (Frame Check Sequence (FCS)) + - * 12B (Inter Frame Gap (IFG)) - */ - mtu 1522 /* Assuming Ethernet/IPv4 pkt (FCS not included) */ - n_subports_per_port 1 /* Number of subports per output interface */ - n_pipes_per_subport 4096 /* Number of pipes (users/subscribers) */ - queue_sizes 64 64 64 64 /* Packet queue size for each traffic class. - * All queues within the same pipe traffic class - * have the same size. Queues from different - * pipes serving the same traffic class have - * the same size. */ -} -``` - - -#### Subport configuration - -``` -subport 0 { - tb_rate 1250000000 /* Subport level token bucket rate (bytes per second) */ - tb_size 1000000 /* Subport level token bucket size (bytes) */ - tc0_rate 1250000000 /* Subport level token bucket rate for traffic class 0 (bytes per second) */ - tc1_rate 1250000000 /* Subport level token bucket rate for traffic class 1 (bytes per second) */ - tc2_rate 1250000000 /* Subport level token bucket rate for traffic class 2 (bytes per second) */ - tc3_rate 1250000000 /* Subport level token bucket rate for traffic class 3 (bytes per second) */ - tc_period 10 /* Time interval for refilling the token bucket associated with traffic class (Milliseconds) */ - pipe 0 4095 profile 0 /* pipes (users/subscribers) configured with pipe profile 0 */ -} -``` - - -#### Pipe configuration - -``` -pipe_profile 0 { - tb_rate 305175 /* Pipe level token bucket rate (bytes per second) */ - tb_size 1000000 /* Pipe level token bucket size (bytes) */ - tc0_rate 305175 /* Pipe level token bucket rate for traffic class 0 (bytes per second) */ - tc1_rate 305175 /* Pipe level token bucket rate for traffic class 1 (bytes per second) */ - tc2_rate 305175 /* Pipe level token bucket rate for traffic class 2 (bytes per second) */ - tc3_rate 305175 /* Pipe level token bucket rate for traffic class 3 (bytes per second) */ - tc_period 40 /* Time interval for refilling the token bucket associated with traffic class at pipe level (Milliseconds) */ - tc3_oversubscription_weight 1 /* Weight traffic class 3 oversubscription */ - tc0_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 0 */ - tc1_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 1 */ - tc2_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 2 */ - tc3_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 3 */ -} -``` - - -#### Random Early Detection (RED) parameters per traffic class and color (Green / Yellow / Red) - -``` -red { - tc0_wred_min 48 40 32 /* Minimum threshold for traffic class 0 queue (min_th) in number of packets */ - tc0_wred_max 64 64 64 /* Maximum threshold for traffic class 0 queue (max_th) in number of packets */ - tc0_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 0 queue (maxp = 1 / maxp_inv) */ - tc0_wred_weight 9 9 9 /* Traffic Class 0 queue weight */ - tc1_wred_min 48 40 32 /* Minimum threshold for traffic class 1 queue (min_th) in number of packets */ - tc1_wred_max 64 64 64 /* Maximum threshold for traffic class 1 queue (max_th) in number of packets */ - tc1_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 1 queue (maxp = 1 / maxp_inv) */ - tc1_wred_weight 9 9 9 /* Traffic Class 1 queue weight */ - tc2_wred_min 48 40 32 /* Minimum threshold for traffic class 2 queue (min_th) in number of packets */ - tc2_wred_max 64 64 64 /* Maximum threshold for traffic class 2 queue (max_th) in number of packets */ - tc2_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 2 queue (maxp = 1 / maxp_inv) */ - tc2_wred_weight 9 9 9 /* Traffic Class 2 queue weight */ - tc3_wred_min 48 40 32 /* Minimum threshold for traffic class 3 queue (min_th) in number of packets */ - tc3_wred_max 64 64 64 /* Maximum threshold for traffic class 3 queue (max_th) in number of packets */ - tc3_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 3 queue (maxp = 1 / maxp_inv) */ - tc3_wred_weight 9 9 9 /* Traffic Class 3 queue weight */ -} -``` - - -### DPDK QoS Scheduler Integration in VPP - -The Hierarchical Quaity-of-Service (HQoS) scheduler object could be seen as -part of the logical NIC output interface. To enable HQoS on specific output -interface, vpp startup.conf file has to be configured accordingly. The output -interface that requires HQoS, should have "hqos" parameter specified in dpdk -section. Another optional parameter "hqos-thread" has been defined which can -be used to associate the output interface with specific hqos thread. In cpu -section of the config file, "corelist-hqos-threads" is introduced to assign -logical cpu cores to run the HQoS threads. A HQoS thread can run multiple HQoS -objects each associated with different output interfaces. All worker threads -instead of writing packets to NIC TX queue directly, write the packets to a -software queues. The hqos_threads read the software queues, and enqueue the -packets to HQoS objects, as well as dequeue packets from HQOS objects and -write them to NIC output interfaces. The worker threads need to be able to -send the packets to any output interface, therefore, each HQoS object -associated with NIC output interface should have software queues equal to -worker threads count. - -Following illustrates the sample startup configuration file with 4x worker -threads feeding 2x hqos threads that handle each QoS scheduler for 1x output -interface. - -``` -dpdk { - socket-mem 16384,16384 - - dev 0000:02:00.0 { - num-rx-queues 2 - hqos - } - dev 0000:06:00.0 { - num-rx-queues 2 - hqos - } - - num-mbufs 1000000 -} - -cpu { - main-core 0 - corelist-workers 1, 2, 3, 4 - corelist-hqos-threads 5, 6 -} -``` - - -### QoS scheduler CLI Commands - -Each QoS scheduler instance is initialised with default parameters required to -configure hqos port, subport, pipe and queues. Some of the parameters can be -re-configured in run-time through CLI commands. - - -#### Configuration - -Following commands can be used to configure QoS scheduler parameters. - -The command below can be used to set the subport level parameters such as -token bucket rate (bytes per seconds), token bucket size (bytes), traffic -class rates (bytes per seconds) and token update period (Milliseconds). - -``` -set dpdk interface hqos subport subport [rate ] - [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] -``` - -For setting the pipe profile, following command can be used. - -``` -set dpdk interface hqos pipe subport pipe - profile -``` - -To assign QoS scheduler instance to the specific thread, following command can -be used. - -``` -set dpdk interface hqos placement thread -``` - -The command below is used to set the packet fields required for classifiying -the incoming packet. As a result of classification process, packet field -information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, -color) and stored in packet mbuf. - -``` -set dpdk interface hqos pktfield id subport|pipe|tc offset - mask -``` - -The DSCP table entries used for idenfiying the traffic class and queue can be set using the command below; - -``` -set dpdk interface hqos tctbl entry tc queue -``` - - -#### Show Command - -The QoS Scheduler configuration can displayed using the command below. - -``` - vpp# show dpdk interface hqos TenGigabitEthernet2/0/0 - Thread: - Input SWQ size = 4096 packets - Enqueue burst size = 256 packets - Dequeue burst size = 220 packets - Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) - Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) - Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) - Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) - [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - Port: - Rate = 1250000000 bytes/second - MTU = 1514 bytes - Frame overhead = 24 bytes - Number of subports = 1 - Number of pipes per subport = 4096 - Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets - Number of pipe profiles = 1 - Subport 0: - Rate = 120000000 bytes/second - Token bucket size = 1000000 bytes - Traffic class rate: TC0 = 120000000, TC1 = 120000000, TC2 = 120000000, TC3 = 120000000 bytes/second - TC period = 10 milliseconds - Pipe profile 0: - Rate = 305175 bytes/second - Token bucket size = 1000000 bytes - Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second - TC period = 40 milliseconds - TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 -``` - -The QoS Scheduler placement over the logical cpu cores can be displayed using -below command. - -``` - vpp# show dpdk interface hqos placement - Thread 5 (vpp_hqos-threads_0 at lcore 5): - TenGigabitEthernet2/0/0 queue 0 - Thread 6 (vpp_hqos-threads_1 at lcore 6): - TenGigabitEthernet4/0/1 queue 0 -``` - - -### QoS Scheduler Binary APIs - -This section explans the available binary APIs for configuring QoS scheduler -parameters in run-time. - -The following API can be used to set the pipe profile of a pipe that belongs -to a given subport: - -``` -sw_interface_set_dpdk_hqos_pipe rx | sw_if_index - subport pipe profile -``` - -The data structures used for set the pipe profile parameter are as follows; - -``` - /** \\brief DPDK interface HQoS pipe profile set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param pipe - pipe ID within its subport - @param profile - pipe profile ID - */ - define sw_interface_set_dpdk_hqos_pipe { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 pipe; - u32 profile; - }; - - /** \\brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code - */ - define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; - }; -``` - -The following API can be used to set the subport level parameters, for -example- token bucket rate (bytes per seconds), token bucket size (bytes), -traffic class rate (bytes per seconds) and tokens update period. - -``` -sw_interface_set_dpdk_hqos_subport rx | sw_if_index - subport [rate ] [bktsize ] - [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] -``` - -The data structures used for set the subport level parameter are as follows; - -``` - /** \\brief DPDK interface HQoS subport parameters set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param tb_rate - subport token bucket rate (measured in bytes/second) - @param tb_size - subport token bucket size (measured in credits) - @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) - @param tc_period - enforcement period for rates (measured in milliseconds) - */ - define sw_interface_set_dpdk_hqos_subport { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 tb_rate; - u32 tb_size; - u32 tc_rate[4]; - u32 tc_period; - }; - - /** \\brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code - */ - define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; - }; -``` - -The following API can be used set the DSCP table entry. The DSCP table have -64 entries to map the packet DSCP field onto traffic class and hqos input -queue. - -``` -sw_interface_set_dpdk_hqos_tctbl rx | sw_if_index - entry tc queue -``` - -The data structures used for setting DSCP table entries are given below. - -``` - /** \\brief DPDK interface HQoS tctbl entry set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param entry - entry index ID - @param tc - traffic class (0 .. 3) - @param queue - traffic class queue (0 .. 3) - */ - define sw_interface_set_dpdk_hqos_tctbl { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 entry; - u32 tc; - u32 queue; - }; - - /** \\brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code - */ - define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; - }; -``` diff --git a/src/vnet/devices/dpdk/thread.c b/src/vnet/devices/dpdk/thread.c deleted file mode 100644 index 475dd142..00000000 --- a/src/vnet/devices/dpdk/thread.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static clib_error_t * -dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) -{ - int r; - r = rte_eal_remote_launch (fp, (void *) w, lcore_id); - if (r) - return clib_error_return (0, "Failed to launch thread %u", lcore_id); - return 0; -} - -static clib_error_t * -dpdk_thread_set_lcore (u32 thread, u16 lcore) -{ - return 0; -} - -static vlib_thread_callbacks_t callbacks = { - .vlib_launch_thread_cb = &dpdk_launch_thread, - .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore, -}; - -static clib_error_t * -dpdk_thread_init (vlib_main_t * vm) -{ - vlib_thread_cb_register (vm, &callbacks); - return 0; -} - -VLIB_INIT_FUNCTION (dpdk_thread_init); - -/** @endcond */ -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index 3083b614..dd23a909 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -328,17 +328,6 @@ typedef struct int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_intf_details_t ** out_vuids); -// CLI commands to be used from dpdk -clib_error_t *vhost_user_connect_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd); -clib_error_t *vhost_user_delete_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd); -clib_error_t *show_vhost_user_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd); - #endif /* diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index 49b475cf..e37bccee 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -79,11 +79,7 @@ static void vl_api_ipsec_spd_add_del_t_handler vl_api_ipsec_spd_add_del_reply_t *rmp; int rv; -#if DPDK > 0 rv = ipsec_add_del_spd (vm, ntohl (mp->spd_id), mp->is_add); -#else - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif REPLY_MACRO (VL_API_IPSEC_SPD_ADD_DEL_REPLY); #endif diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c index e15faeb8..4a65b024 100644 --- a/src/vnet/pg/input.c +++ b/src/vnet/pg/input.c @@ -1212,10 +1212,10 @@ pg_stream_fill_helper (pg_main_t * pg, /* * Historically, the pg maintained its own free lists and - * device drivers tx paths would return pkts. With the DPDK, - * that doesn't happen. + * device drivers tx paths would return pkts. */ - if (DPDK == 0 && !(s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) + if (vm->buffer_main->extern_buffer_mgmt == 0 && + !(s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) f->buffer_init_function = pg_buffer_init; f->buffer_init_function_opaque = (s - pg->streams) | ((bi - s->buffer_indices) << 24); @@ -1238,7 +1238,7 @@ pg_stream_fill_helper (pg_main_t * pg, n_alloc = n_allocated; /* Reinitialize buffers */ - if (DPDK == 0 || CLIB_DEBUG > 0 + if (vm->buffer_main->extern_buffer_mgmt == 0 || CLIB_DEBUG > 0 || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) init_buffers_inline (vm, s, @@ -1246,7 +1246,8 @@ pg_stream_fill_helper (pg_main_t * pg, n_alloc, (bi - s->buffer_indices) * s->buffer_bytes /* data offset */ , s->buffer_bytes, /* set_data */ - DPDK == 1 || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE) != 0); + vm->buffer_main->extern_buffer_mgmt != 0 + || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE) != 0); if (next_buffers) pg_set_next_buffer_pointers (pg, s, buffers, next_buffers, n_alloc); diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c index c46875e1..560c4b07 100644 --- a/src/vnet/pg/stream.c +++ b/src/vnet/pg/stream.c @@ -442,9 +442,8 @@ pg_stream_add (pg_main_t * pg, pg_stream_t * s_init) pg_buffer_index_t *bi; int n; -#if DPDK > 0 - s->buffer_bytes = VLIB_BUFFER_DATA_SIZE; -#endif + if (vm->buffer_main->extern_buffer_mgmt) + s->buffer_bytes = VLIB_BUFFER_DATA_SIZE; if (!s->buffer_bytes) s->buffer_bytes = s->max_packet_bytes; diff --git a/src/vnet/replication.c b/src/vnet/replication.c index 02755195..86d922b5 100644 --- a/src/vnet/replication.c +++ b/src/vnet/replication.c @@ -214,9 +214,9 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl) b0->flags |= VLIB_BUFFER_IS_RECYCLED; #if (CLIB_DEBUG > 0) -#if DPDK == 0 - vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED); -#endif + if (vm->buffer_main->extern_buffer_mgmt == 0) + vlib_buffer_set_known_state (vm, bi0, + VLIB_BUFFER_KNOWN_ALLOCATED); #endif /* If buffer is traced, mark frame as traced */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index c4075db6..9d3abae5 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -30,9 +30,6 @@ #endif /* included_from_layer_3 */ #include -#if DPDK > 0 -#include -#endif #include #include #include diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 4cc6aa73..3871601b 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -237,58 +237,6 @@ static void *vl_api_sw_interface_set_l2_bridge_t_print FINISH; } -#if DPDK > 0 -static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = format (s, "subport %u pipe %u profile %u ", - ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); - - FINISH; -} - -static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = - format (s, - "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", - ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), - ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), - ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), - ntohl (mp->tc_period)); - - FINISH; -} - -static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = format (s, "entry %u tc %u queue %u", - ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); - - FINISH; -} -#endif - static void *vl_api_bridge_domain_add_del_t_print (vl_api_bridge_domain_add_del_t * mp, void *handle) { @@ -3036,18 +2984,6 @@ vl_msg_api_custom_dump_configure (api_main_t * am) = (void *) vl_api_##f##_t_print; foreach_custom_print_function; #undef _ - -#if DPDK > 0 - /* - * manually add DPDK hqos print handlers - */ - am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE] = - (void *) vl_api_sw_interface_set_dpdk_hqos_pipe_t_print; - am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT] = - (void *) vl_api_sw_interface_set_dpdk_hqos_subport_t_print; - am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL] = - (void *) vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print; -#endif } /* diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c index b28608f0..610f40ed 100644 --- a/src/vpp/api/gmon.c +++ b/src/vpp/api/gmon.c @@ -137,7 +137,8 @@ gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) *gm->vector_rate_ptr = vector_rate; now = vlib_time_now (vm); dt = now - last_runtime; - input_packets = vnet_get_aggregate_rx_packets (); + // TODO + //input_packets = vnet_get_aggregate_rx_packets (); *gm->input_rate_ptr = (f64) (input_packets - last_input_packets) / dt; last_runtime = now; last_input_packets = input_packets; diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 2d6e4f37..7f9c2038 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -41,7 +41,6 @@ * SESSION APIs: .../vnet/session/{session.api session_api.c} * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c} * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c} - * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} * CLASSIFY APIs: see ... /src/vnet/classify/{classify.api, classify_api.c} * FLOW APIs: see ... /src/vnet/flow/{flow.api, flow_api.c} * DHCP APIs: see ... /src/vnet/dhcp/{dhcpk.api, dhcp_api.c} diff --git a/src/vpp/app/l2t.c b/src/vpp/app/l2t.c deleted file mode 100644 index e1eda155..00000000 --- a/src/vpp/app/l2t.c +++ /dev/null @@ -1,562 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#if DPDK == 0 -#include -#else -#include -#endif - -#include -#include -#include - -l2t_main_t l2t_main; - -/* $$$$ unused? - * get_interface_ethernet_address - * paints the ethernet address for a given interface - * into the supplied destination - */ -void -get_interface_ethernet_address (l2t_main_t * lm, u8 * dst, u32 sw_if_index) -{ - ethernet_main_t *em = ethernet_get_main (lm->vlib_main); - ethernet_interface_t *ei; - vnet_hw_interface_t *hi; - - hi = vnet_get_sup_hw_interface (lm->vnet_main, sw_if_index); - ei = pool_elt_at_index (em->interfaces, hi->hw_instance); - clib_memcpy (dst, ei->address, sizeof (ei->address)); -} - -/* packet trace format function */ -u8 * -format_l2t_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - l2t_trace_t *t = va_arg (*args, l2t_trace_t *); - - if (t->is_user_to_network) - s = format (s, "L2T: %U (client) -> %U (our) session %d", - format_ip6_address, &t->client_address, - format_ip6_address, &t->our_address, t->session_index); - else - s = format (s, "L2T: %U (our) -> %U (client) session %d)", - format_ip6_address, &t->our_address, - format_ip6_address, &t->client_address, t->session_index); - return s; -} - -u8 * -format_l2t_session (u8 * s, va_list * args) -{ - l2t_session_t *session = va_arg (*args, l2t_session_t *); - l2t_main_t *lm = &l2t_main; - u32 counter_index; - vlib_counter_t v; - - s = format (s, "[%d] %U (our) %U (client) vlan-id %d rx_sw_if_index %d\n", - session - lm->sessions, - format_ip6_address, &session->our_address, - format_ip6_address, &session->client_address, - clib_net_to_host_u16 (session->vlan_id), session->sw_if_index); - - s = format (s, " local cookie %llx remote cookie %llx\n", - clib_net_to_host_u64 (session->local_cookie), - clib_net_to_host_u64 (session->remote_cookie)); - - if (session->cookie_flags & L2TP_COOKIE_ROLLOVER_LOCAL) - { - s = format (s, " local rollover cookie %llx\n", - clib_net_to_host_u64 (session->lcl_ro_cookie)); - } - - s = format (s, " local session-id %d remote session-id %d\n", - clib_net_to_host_u32 (session->local_session_id), - clib_net_to_host_u32 (session->remote_session_id)); - - s = format (s, " l2 specific sublayer %s\n", - session->l2_sublayer_present ? "preset" : "absent"); - - counter_index = - session_index_to_counter_index (session - lm->sessions, - SESSION_COUNTER_USER_TO_NETWORK); - - vlib_get_combined_counter (&lm->counter_main, counter_index, &v); - if (v.packets != 0) - s = format (s, " user-to-net: %llu pkts %llu bytes\n", - v.packets, v.bytes); - - vlib_get_combined_counter (&lm->counter_main, counter_index + 1, &v); - - if (v.packets != 0) - s = format (s, " net-to-user: %llu pkts %llu bytes\n", - v.packets, v.bytes); - return s; -} - -static clib_error_t * -show_session_summary_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_main_t *lm = &l2t_main; - - vlib_cli_output (vm, "%d active sessions\n", pool_elts (lm->sessions)); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (show_session_summary_command) = { - .path = "show session", - .short_help = "show session summary", - .function = show_session_summary_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_session_detail_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_session_t *session; - l2t_main_t *lm = &l2t_main; - - /* *INDENT-OFF* */ - pool_foreach (session, lm->sessions, - ({ - vlib_cli_output (vm, "%U", format_l2t_session, session); - })); - /* *INDENT-ON* */ - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (show_session_detail_command) = { - .path = "show session detail", - .short_help = "show session table detail", - .function = show_session_detail_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -test_counters_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - l2t_session_t *session; - l2t_main_t *lm = &l2t_main; - u32 session_index; - u32 counter_index; - u32 nincr = 0; - - /* *INDENT-OFF* */ - pool_foreach (session, lm->sessions, - ({ - session_index = session - lm->sessions; - counter_index = - session_index_to_counter_index (session_index, - SESSION_COUNTER_USER_TO_NETWORK); - vlib_increment_combined_counter (&lm->counter_main, - counter_index, - 1/*pkt*/, 1111 /*bytes*/); - vlib_increment_combined_counter (&lm->counter_main, - counter_index+1, - 1/*pkt*/, 2222 /*bytes*/); - nincr++; - })); - /* *INDENT-ON* */ - vlib_cli_output (vm, "Incremented %d active counters\n", nincr); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (test_counters_command) = { - .path = "test counters", - .short_help = "increment all active counters", - .function = test_counters_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -clear_counters_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - l2t_session_t *session; - l2t_main_t *lm = &l2t_main; - u32 session_index; - u32 counter_index; - u32 nincr = 0; - - /* *INDENT-OFF* */ - pool_foreach (session, lm->sessions, - ({ - session_index = session - lm->sessions; - counter_index = - session_index_to_counter_index (session_index, - SESSION_COUNTER_USER_TO_NETWORK); - vlib_zero_combined_counter (&lm->counter_main, counter_index); - vlib_zero_combined_counter (&lm->counter_main, counter_index+1); - nincr++; - })); - /* *INDENT-ON* */ - vlib_cli_output (vm, "Cleared %d active counters\n", nincr); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (clear_counters_command) = { - .path = "clear counters", - .short_help = "clear all active counters", - .function = clear_counters_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -l2tp_session_add_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip6_address_t client_address, our_address; - ip6_address_t *dst_address_copy, *src_address_copy; - unformat_input_t _line_input, *line_input = &_line_input; - u32 vlan_id; - u32 sw_if_index = (u32) ~ 0; - l2t_main_t *lm = &l2t_main; - l2t_session_t *s; - uword *p; - vnet_hw_interface_t *hi; - vnet_sw_interface_t *si; - u32 next_index; - uword vlan_and_sw_if_index_key; - u32 counter_index; - u64 local_cookie = (u64) ~ 0, remote_cookie = (u64) ~ 0; - u32 local_session_id = 1, remote_session_id = 1; - int our_address_set = 0, client_address_set = 0; - int l2_sublayer_present = 0; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "client %U", - unformat_ip6_address, &client_address)) - client_address_set = 1; - else if (unformat (line_input, "our %U", - unformat_ip6_address, &our_address)) - our_address_set = 1; - else if (unformat (line_input, "vlan %d", &vlan_id)) - ; - else if (unformat (line_input, "l2-interface %U", - unformat_vnet_sw_interface, - vnet_get_main (), &sw_if_index)) - ; - else if (unformat (line_input, "interface %U", - unformat_vnet_sw_interface, - vnet_get_main (), &sw_if_index)) - ; - else if (unformat (line_input, "local-cookie %llx", &local_cookie)) - ; - else if (unformat (line_input, "remote-cookie %llx", &remote_cookie)) - ; - else if (unformat (line_input, "local-session-id %d", - &local_session_id)) - ; - else if (unformat (line_input, "remote-session-id %d", - &remote_session_id)) - ; - else if (unformat (line_input, "l2-sublayer-present")) - l2_sublayer_present = 1; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - unformat_free (line_input); - return error; - } - } - - unformat_free (line_input); - - if (sw_if_index == (u32) ~ 0) - return clib_error_return (0, "l2-interface not specified"); - if (our_address_set == 0) - return clib_error_return (0, "our address not specified"); - if (client_address_set == 0) - return clib_error_return (0, "client address not specified"); - - remote_session_id = clib_host_to_net_u32 (remote_session_id); - local_session_id = clib_host_to_net_u32 (local_session_id); - - switch (lm->lookup_type) - { - case L2T_LOOKUP_SRC_ADDRESS: - p = hash_get_mem (lm->session_by_src_address, &client_address); - if (p) - return clib_error_return - (0, "Session w/ client address %U already exists", - format_ip6_address, &client_address); - break; - - case L2T_LOOKUP_DST_ADDRESS: - p = hash_get_mem (lm->session_by_dst_address, &our_address); - if (p) - return clib_error_return - (0, "Session w/ our address %U already exists", - format_ip6_address, &our_address); - break; - - case L2T_LOOKUP_SESSION_ID: - p = hash_get (lm->session_by_session_id, local_session_id); - if (p) - return clib_error_return - (0, - "Session w/ local session id %d already exists", - clib_net_to_host_u32 (local_session_id)); - break; - - default: - ASSERT (0); - } - - pool_get (lm->sessions, s); - memset (s, 0, sizeof (*s)); - clib_memcpy (&s->our_address, &our_address, sizeof (s->our_address)); - clib_memcpy (&s->client_address, &client_address, - sizeof (s->client_address)); - s->sw_if_index = sw_if_index; - s->vlan_id = clib_host_to_net_u16 (vlan_id); - s->local_cookie = clib_host_to_net_u64 (local_cookie); - l2tp_session_set_remote_cookie (s, remote_cookie); - s->local_session_id = local_session_id; - s->remote_session_id = remote_session_id; - s->l2_sublayer_present = l2_sublayer_present; - - hi = vnet_get_sup_hw_interface (lm->vnet_main, sw_if_index); - si = vnet_get_sup_sw_interface (lm->vnet_main, sw_if_index); - - next_index = vlib_node_add_next (vm, l2t_ip6_node.index, - hi->output_node_index); - s->l2_output_next_index = next_index; - s->l2_output_sw_if_index = si->sw_if_index; - - /* Setup hash table entries */ - switch (lm->lookup_type) - { - case L2T_LOOKUP_SRC_ADDRESS: - src_address_copy = clib_mem_alloc (sizeof (*src_address_copy)); - clib_memcpy (src_address_copy, &client_address, - sizeof (*src_address_copy)); - hash_set_mem (lm->session_by_src_address, src_address_copy, - s - lm->sessions); - break; - case L2T_LOOKUP_DST_ADDRESS: - dst_address_copy = clib_mem_alloc (sizeof (*dst_address_copy)); - clib_memcpy (dst_address_copy, &our_address, - sizeof (*dst_address_copy)); - hash_set_mem (lm->session_by_dst_address, dst_address_copy, - s - lm->sessions); - break; - case L2T_LOOKUP_SESSION_ID: - hash_set (lm->session_by_session_id, local_session_id, - s - lm->sessions); - break; - - default: - ASSERT (0); - } - - vlan_and_sw_if_index_key = ((uword) (s->vlan_id) << 32) | sw_if_index; - hash_set (lm->session_by_vlan_and_rx_sw_if_index, - vlan_and_sw_if_index_key, s - lm->sessions); - - /* validate counters */ - counter_index = - session_index_to_counter_index (s - lm->sessions, - SESSION_COUNTER_USER_TO_NETWORK); - vlib_validate_counter (&lm->counter_main, counter_index); - vlib_validate_counter (&lm->counter_main, counter_index + 1); - - /* Set promiscuous mode on the l2 interface */ - ethernet_set_flags (lm->vnet_main, hi->hw_if_index, - ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); - vnet_hw_interface_rx_redirect_to_node (lm->vnet_main, hi->hw_if_index, - l2t_l2_node.index); - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (l2tp_session_add_command) = { - .path = "l2tp session add", - .short_help = - "l2tp session add client our vlan local-cookie remote-cookie local-session remote-session l2-interface ", - .function = l2tp_session_add_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -l2tp_session_del_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_main_t *lm = &l2t_main; - u32 session_index; - l2t_session_t *s; - hash_pair_t *hp; - void *key; - uword vlan_and_sw_if_index_key; - - if (!unformat (input, "%d", &session_index)) - return clib_error_return (0, "missing session index: '%U'", - format_unformat_error, input); - - if (pool_is_free_index (lm->sessions, session_index)) - return clib_error_return (0, "session %d not in use", session_index); - - s = pool_elt_at_index (lm->sessions, session_index); - - switch (lm->lookup_type) - { - case L2T_LOOKUP_SRC_ADDRESS: - hp = hash_get_pair_mem (lm->session_by_src_address, &s->client_address); - if (hp) - { - key = (void *) (hp->key); - hash_unset_mem (lm->session_by_src_address, &s->client_address); - clib_mem_free (key); - } - else - clib_warning ("session %d src address key %U AWOL", - s - lm->sessions, - format_ip6_address, &s->client_address); - break; - - case L2T_LOOKUP_DST_ADDRESS: - hp = hash_get_pair_mem (lm->session_by_dst_address, &s->our_address); - if (hp) - { - key = (void *) (hp->key); - hash_unset_mem (lm->session_by_dst_address, &s->our_address); - clib_mem_free (key); - } - else - clib_warning ("session %d dst address key %U AWOL", - s - lm->sessions, format_ip6_address, &s->our_address); - break; - - case L2T_LOOKUP_SESSION_ID: - hash_unset (lm->session_by_session_id, s->local_session_id); - break; - - default: - ASSERT (0); - } - - vlan_and_sw_if_index_key = ((uword) (s->vlan_id) << 32) | s->sw_if_index; - - hash_unset (lm->session_by_vlan_and_rx_sw_if_index, - vlan_and_sw_if_index_key); - - pool_put (lm->sessions, s); - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (l2tp_session_del_command) = { - .path = "l2tp session delete", - .short_help = - "l2tp session delete ", - .function = l2tp_session_del_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -l2tp_session_cookie_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_main_t *lm = &l2t_main; - u32 session_index; - l2t_session_t *s; - u64 lcl_ro_cookie = (u64) ~ 0, rem_ro_cookie = (u64) ~ 0; - u8 cookie_flags = 0; - - if (!unformat (input, "%d", &session_index)) - return clib_error_return (0, "missing session index: '%U'", - format_unformat_error, input); - - if (pool_is_free_index (lm->sessions, session_index)) - return clib_error_return (0, "session %d not in use", session_index); - - s = pool_elt_at_index (lm->sessions, session_index); - - if (unformat (input, "commit")) - { - if (!s->cookie_flags) - { - return clib_error_return (0, "no rollover cookie ready to commit"); - } - else - { - l2tp_session_cookie_commit (s); - return 0; - } - } - if (!unformat (input, "rollover")) - return clib_error_return (0, "missing 'commit|rollover': '%U'", - format_unformat_error, input); - if (unformat (input, "local %llx", &lcl_ro_cookie)) - { - cookie_flags |= L2TP_COOKIE_ROLLOVER_LOCAL; - l2tp_session_set_local_rollover_cookie (s, lcl_ro_cookie); - } - if (unformat (input, "remote %llx", &rem_ro_cookie)) - { - cookie_flags |= L2TP_COOKIE_ROLLOVER_REMOTE; - l2tp_session_set_remote_cookie (s, rem_ro_cookie); - } - if (!cookie_flags) - return clib_error_return (0, "no rollover cookie specified"); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (l2tp_session_cookie_command) = { - .path = "l2tp session cookie", - .short_help = - "l2tp session cookie commit|rollover [local ] [remote ]", - .function = l2tp_session_cookie_command_fn, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vpp/app/l2t_l2.c b/src/vpp/app/l2t_l2.c deleted file mode 100644 index 07d30d9a..00000000 --- a/src/vpp/app/l2t_l2.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#if DPDK == 0 -#include -#include -#include -#else -#include -#endif - -#include -#include -#include - -l2t_main_t l2t_main; - -/* Statistics (not really errors) */ -#define foreach_l2t_l2_error \ -_(NETWORK_TO_USER, "L2 network to user (ip6) pkts") - -static char *l2t_l2_error_strings[] = { -#define _(sym,string) string, - foreach_l2t_l2_error -#undef _ -}; - -typedef enum -{ -#define _(sym,str) L2T_L2_ERROR_##sym, - foreach_l2t_l2_error -#undef _ - L2T_L2_N_ERROR, -} l2t_l2_error_t; - -/* - * Packets go to ethernet-input when they don't match a mapping - */ -typedef enum -{ - L2T_L2_NEXT_DROP, - L2T_L2_NEXT_ETHERNET_INPUT, - L2T_L2_NEXT_IP6_LOOKUP, - L2T_L2_N_NEXT, -} l2t_l2_next_t; - -vlib_node_registration_t l2t_l2_node; - -#define NSTAGES 3 - -static inline void -stage0 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); - vlib_prefetch_buffer_header (b, STORE); - CLIB_PREFETCH (b->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); -} - -static inline void -stage1 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - l2t_main_t *lm = &l2t_main; - ethernet_header_t *eh; - ethernet_vlan_header_t *vh; - u32 session_index; - uword *p; - uword vlan_and_sw_if_index_key; - - /* just in case, needed to test with the tun/tap device */ - vlib_buffer_reset (b); - - eh = vlib_buffer_get_current (b); - - /* Not a VLAN pkt? send to ethernet-input... */ - if (PREDICT_FALSE (eh->type != clib_host_to_net_u16 (0x8100))) - { - vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_ETHERNET_INPUT; - return; - } - vh = (ethernet_vlan_header_t *) (eh + 1); - - /* look up session */ - vlan_and_sw_if_index_key = ((uword) (vh->priority_cfi_and_id) << 32) - | vnet_buffer (b)->sw_if_index[VLIB_RX]; - - p = hash_get (lm->session_by_vlan_and_rx_sw_if_index, - vlan_and_sw_if_index_key); - - if (PREDICT_FALSE (p == 0)) - { - /* $$$ drop here if not for our MAC? */ - vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_ETHERNET_INPUT; - return; - } - else - { - session_index = p[0]; - } - - /* Remember mapping index, prefetch the mini counter */ - vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_IP6_LOOKUP; - vnet_buffer (b)->l2t.session_index = session_index; - - /* Each mapping has 2 x (pkt, byte) counters, hence the shift */ - CLIB_PREFETCH (lm->counter_main.mini + (p[0] << 1), CLIB_CACHE_LINE_BYTES, - STORE); -} - -static inline u32 -last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - l2t_main_t *lm = &l2t_main; - ethernet_header_t *eh = vlib_buffer_get_current (b); - vlib_node_t *n = vlib_get_node (vm, l2t_l2_node.index); - u32 node_counter_base_index = n->error_heap_index; - vlib_error_main_t *em = &vm->error_main; - l2tpv3_header_t *l2t; /* l2 header */ - ethernet_vlan_header_t *vh; /* 802.1q vlan header */ - u32 counter_index; - l2t_session_t *s; - ip6_header_t *ip6; - u16 payload_ethertype; - u8 dst_mac_address[6]; - u8 src_mac_address[6]; - u16 payload_length; - i32 backup; - - /* Other-than-output pkt? We're done... */ - if (vnet_buffer (b)->l2t.next_index != L2T_L2_NEXT_IP6_LOOKUP) - return vnet_buffer (b)->l2t.next_index; - - vh = (ethernet_vlan_header_t *) (eh + 1); - - em->counters[node_counter_base_index + L2T_L2_ERROR_NETWORK_TO_USER] += 1; - - counter_index = - session_index_to_counter_index (vnet_buffer (b)->l2t.session_index, - SESSION_COUNTER_NETWORK_TO_USER); - - /* per-mapping byte stats include the ethernet header */ - vlib_increment_combined_counter (&lm->counter_main, counter_index, - 1 /* packet_increment */ , - vlib_buffer_length_in_chain (vm, b) + - sizeof (ethernet_header_t)); - - s = pool_elt_at_index (lm->sessions, vnet_buffer (b)->l2t.session_index); - - /* Save src/dst MAC addresses */ -#define _(i) dst_mac_address[i] = eh->dst_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ -#define _(i) src_mac_address[i] = eh->src_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ - - payload_ethertype = vh->type; - - /* Splice out the 802.1q vlan tag */ - vlib_buffer_advance (b, 4); - eh = vlib_buffer_get_current (b); - - /* restore src/dst MAC addresses */ -#define _(i) eh->dst_address[i] = dst_mac_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ -#define _(i) eh->src_address[i] = src_mac_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ - eh->type = payload_ethertype; - - /* Paint on an l2tpv3 hdr */ - backup = sizeof (*l2t); -#if 0 - /* back up 4 bytes less if no l2 sublayer */ - backup -= s->l2_sublayer_present ? 0 : 4; -#endif - - vlib_buffer_advance (b, -backup); - l2t = vlib_buffer_get_current (b); - - l2t->session_id = s->remote_session_id; - l2t->cookie = s->remote_cookie; - -#if 0 - if (s->l2_sublayer_present) - l2t->l2_specific_sublayer = 0; -#endif - - /* Paint on an ip6 header */ - vlib_buffer_advance (b, -(sizeof (*ip6))); - ip6 = vlib_buffer_get_current (b); - - ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0x6 << 28); - - /* calculate ip6 payload length */ - payload_length = vlib_buffer_length_in_chain (vm, b); - payload_length -= sizeof (*ip6); - - ip6->payload_length = clib_host_to_net_u16 (payload_length); - ip6->protocol = 0x73; /* l2tpv3 */ - ip6->hop_limit = 0xff; - ip6->src_address.as_u64[0] = s->our_address.as_u64[0]; - ip6->src_address.as_u64[1] = s->our_address.as_u64[1]; - ip6->dst_address.as_u64[0] = s->client_address.as_u64[0]; - ip6->dst_address.as_u64[1] = s->client_address.as_u64[1]; - - return L2T_L2_NEXT_IP6_LOOKUP; -} - -#include - -static uword -l2t_l2_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return dispatch_pipeline (vm, node, frame); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (l2t_l2_node) = { - .function = l2t_l2_node_fn, - .name = "l2t-l2-input", - .vector_size = sizeof (u32), - .format_trace = format_l2t_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(l2t_l2_error_strings), - .error_strings = l2t_l2_error_strings, - - .n_next_nodes = L2T_L2_N_NEXT, - - /* edit / add dispositions here */ - .next_nodes = { - [L2T_L2_NEXT_IP6_LOOKUP] = "ip6-lookup", - [L2T_L2_NEXT_ETHERNET_INPUT] = "ethernet-input", - [L2T_L2_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (l2t_l2_node, l2t_l2_node_fn); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ -- cgit 1.2.3-korg From aad20988b6a0a5058e520948d2a5835cfbc3b523 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 20 Jun 2017 16:35:29 +0200 Subject: Rewrite vppctl in C - removes python dependency - removes vpp_api_test dependency - communicates over unix socket - properly detects terminal size and type - responds on terminal resize Change-Id: I46c0a49f9b5f9ef8a0a31faec4fc5d49aa3ee02e Signed-off-by: Damjan Marion --- Makefile | 2 +- src/vpp.am | 4 + src/vpp/app/vppctl.c | 322 ++++++++++++++++++++++++++++++++++++++++++++++ src/vpp/conf/startup.conf | 1 + 4 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 src/vpp/app/vppctl.c (limited to 'src/vpp/app') diff --git a/Makefile b/Makefile index 3ef236a6..af946959 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ GDB?=gdb PLATFORM?=vpp SAMPLE_PLUGIN?=no -MINIMAL_STARTUP_CONF="unix { interactive }" +MINIMAL_STARTUP_CONF="unix { interactive cli-listen /run/vpp/cli.sock gid $(shell id -g) }" GDB_ARGS= -ex "handle SIGUSR1 noprint nostop" diff --git a/src/vpp.am b/src/vpp.am index 1c95949a..614bd26a 100644 --- a/src/vpp.am +++ b/src/vpp.am @@ -85,6 +85,10 @@ bin_vpp_LDADD = \ bin_vpp_LDFLAGS = -Wl,--export-dynamic +bin_PROGRAMS += bin/vppctl +bin_vppctl_SOURCES = vpp/app/vppctl.c +bin_vppctl_LDADD = libvppinfra.la + if ENABLE_TESTS noinst_PROGRAMS += bin/test_client diff --git a/src/vpp/app/vppctl.c b/src/vpp/app/vppctl.c new file mode 100644 index 00000000..a8f3eab0 --- /dev/null +++ b/src/vpp/app/vppctl.c @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG 0 + +#if DEBUG +#define TELCMDS +#define TELOPTS +#endif + +#include + +#include +#include +#include + +#define SOCKET_FILE "/run/vpp/cli.sock" + +volatile int window_resized = 0; +struct termios orig_tio; + +static void +send_ttype (clib_socket_t * s, int is_dumb) +{ + clib_socket_tx_add_formatted (s, "%c%c%c" "%c%s" "%c%c", + IAC, SB, TELOPT_TTYPE, + 0, is_dumb ? "dumb" : getenv ("TERM"), + IAC, SE); + clib_socket_tx (s); +} + +static void +send_naws (clib_socket_t * s) +{ + struct winsize ws; + + if (ioctl (STDIN_FILENO, TIOCGWINSZ, &ws) < 0) + { + clib_unix_warning ("ioctl(TIOCGWINSZ)"); + return; + } + + clib_socket_tx_add_formatted (s, "%c%c%c" "%c%c%c%c" "%c%c", + IAC, SB, TELOPT_NAWS, + ws.ws_col >> 8, ws.ws_col & 0xff, + ws.ws_row >> 8, ws.ws_row & 0xff, IAC, SE); + clib_socket_tx (s); +} + +static void +signal_handler_winch (int signum) +{ + window_resized = 1; +} + +static void +signal_handler_term (int signum) +{ + tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_tio); +} + +static u8 * +process_input (u8 * str, clib_socket_t * s, int is_interactive) +{ + int i = 0; + + while (i < vec_len (s->rx_buffer)) + { + if (s->rx_buffer[i] == IAC) + { + if (s->rx_buffer[i + 1] == SB) + { + u8 *sb = 0; + char opt = s->rx_buffer[i + 2]; + i += 3; + while (s->rx_buffer[i] != IAC) + vec_add1 (sb, s->rx_buffer[i++]); + +#if DEBUG + clib_warning ("SB %s\n %U", TELOPT (opt), + format_hexdump, sb, vec_len (sb)); +#endif + vec_free (sb); + i += 2; + if (opt == TELOPT_TTYPE) + send_ttype (s, !is_interactive); + else if (is_interactive && opt == TELOPT_NAWS) + send_naws (s); + } + else + { +#if DEBUG + clib_warning ("IAC at %d, IAC %s %s", i, + TELCMD (s->rx_buffer[i + 1]), + TELOPT (s->rx_buffer[i + 2])); +#endif + i += 3; + } + } + else + vec_add1 (str, s->rx_buffer[i++]); + } + vec_reset_length (s->rx_buffer); + return str; +} + + +int +main (int argc, char *argv[]) +{ + clib_socket_t _s = { 0 }, *s = &_s; + clib_error_t *error = 0; + struct epoll_event event; + struct sigaction sa; + struct termios tio; + int efd = -1; + u8 *str = 0; + u8 *cmd = 0; + int do_quit = 0; + + + clib_mem_init (0, 64ULL << 10); + + /* process command line */ + argc--; + argv++; + + if (argc > 1 && strcmp (argv[0], "-s") == 0) + { + s->config = argv[1]; + argc -= 2; + argv += 2; + } + else + s->config = SOCKET_FILE; + + while (argc--) + cmd = format (cmd, "%s%c", (argv++)[0], argc ? ' ' : 0); + + s->flags = SOCKET_IS_CLIENT; + + error = clib_socket_init (s); + if (error) + goto done; + + /* Capture terminal resize events */ + memset (&sa, 0, sizeof (struct sigaction)); + sa.sa_handler = signal_handler_winch; + + if (sigaction (SIGWINCH, &sa, 0) < 0) + { + error = clib_error_return_unix (0, "sigaction"); + goto done; + } + + sa.sa_handler = signal_handler_term; + if (sigaction (SIGTERM, &sa, 0) < 0) + { + error = clib_error_return_unix (0, "sigaction"); + goto done; + } + + /* Save the original tty state so we can restore it later */ + tcgetattr (STDIN_FILENO, &orig_tio); + + /* Tweak the tty settings */ + tio = orig_tio; + /* echo off, canonical mode off, ext'd input processing off */ + tio.c_lflag &= ~(ECHO | ICANON | IEXTEN); + tio.c_cc[VMIN] = 1; /* 1 byte at a time */ + tio.c_cc[VTIME] = 0; /* no timer */ + tcsetattr (STDIN_FILENO, TCSAFLUSH, &tio); + + efd = epoll_create1 (0); + + /* register STDIN */ + event.events = EPOLLIN | EPOLLPRI | EPOLLERR; + event.data.fd = STDIN_FILENO; + if (epoll_ctl (efd, EPOLL_CTL_ADD, STDIN_FILENO, &event) != 0) + { + error = clib_error_return_unix (0, "epoll_ctl[%d]", STDIN_FILENO); + goto done; + } + + /* register socket */ + event.events = EPOLLIN | EPOLLPRI | EPOLLERR; + event.data.fd = s->fd; + if (epoll_ctl (efd, EPOLL_CTL_ADD, s->fd, &event) != 0) + { + error = clib_error_return_unix (0, "epoll_ctl[%d]", s->fd); + goto done; + } + + while (1) + { + int n; + + if (window_resized) + { + window_resized = 0; + send_naws (s); + } + + if ((n = epoll_wait (efd, &event, 1, -1)) < 0) + { + /* maybe we received signal */ + if (errno == EINTR) + continue; + + error = clib_error_return_unix (0, "epoll_wait"); + goto done; + } + + if (n == 0) + continue; + + if (event.data.fd == STDIN_FILENO) + { + int n; + char c[100]; + + n = read (STDIN_FILENO, c, sizeof (c)); + if (n > 0) + { + memcpy (clib_socket_tx_add (s, n), c, n); + error = clib_socket_tx (s); + if (error) + goto done; + } + else if (n < 0) + clib_warning ("read rv=%d", n); + } + else if (event.data.fd == s->fd) + { + error = clib_socket_rx (s, 100); + if (error) + break; + + if (clib_socket_rx_end_of_file (s)) + break; + + str = process_input (str, s, cmd == 0); + + if (vec_len (str) > 0) + { + n = write (STDOUT_FILENO, str, vec_len (str)); + if (n < 0) + { + error = clib_error_return_unix (0, "write"); + goto done; + } + vec_reset_length (str); + } + + if (do_quit) + { + clib_socket_tx_add_formatted (s, "q\n"); + clib_socket_tx (s); + do_quit = 0; + } + if (cmd) + { + clib_socket_tx_add_formatted (s, "%s\n", cmd); + clib_socket_tx (s); + vec_free (cmd); + do_quit = 1; + } + } + else + { + error = clib_error_return (0, "unknown fd"); + goto done; + } + } + + error = clib_socket_close (s); + +done: + vec_free (cmd); + vec_free (str); + if (efd > -1) + close (efd); + + if (error) + { + clib_error_report (error); + return 1; + } + tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_tio); + return 0; +} + +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf index f671439b..1d22358d 100644 --- a/src/vpp/conf/startup.conf +++ b/src/vpp/conf/startup.conf @@ -3,6 +3,7 @@ unix { nodaemon log /tmp/vpp.log full-coredump + cli-listen /run/vpp/cli.sock } api-trace { -- cgit 1.2.3-korg From da78f957e46c686434149d332a477d7ea055d76a Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 24 May 2017 09:15:43 -0700 Subject: L2 over MPLS [support for VPWS/VPLS] - switch to using dpo_proto_t rather than fib_protocol_t in fib_paths so that we can describe L2 paths - VLIB nodes to handle pop/push of MPLS labels to L2 Change-Id: Id050d06a11fd2c9c1c81ce5a0654e6c5ae6afa6e Signed-off-by: Neale Ranns --- src/plugins/gtpu/gtpu.c | 2 +- src/plugins/snat/snat.c | 2 +- src/vat/api_format.c | 17 +- src/vnet/dhcp/client.c | 6 +- src/vnet/dhcp/dhcp6_proxy_node.c | 2 +- src/vnet/dpo/dpo.c | 19 ++ src/vnet/dpo/dpo.h | 11 +- src/vnet/dpo/interface_dpo.c | 30 +++ src/vnet/dpo/mpls_label_dpo.c | 45 +++- src/vnet/ethernet/arp.c | 4 +- src/vnet/fib/fib_api.h | 4 +- src/vnet/fib/fib_entry.c | 8 +- src/vnet/fib/fib_entry_src.c | 16 +- src/vnet/fib/fib_entry_src.h | 4 +- src/vnet/fib/fib_entry_src_api.c | 2 +- src/vnet/fib/fib_entry_src_default_route.c | 2 +- src/vnet/fib/fib_entry_src_interface.c | 2 +- src/vnet/fib/fib_entry_src_lisp.c | 8 +- src/vnet/fib/fib_entry_src_mpls.c | 4 +- src/vnet/fib/fib_entry_src_rr.c | 15 +- src/vnet/fib/fib_entry_src_special.c | 2 +- src/vnet/fib/fib_path.c | 79 +++---- src/vnet/fib/fib_path.h | 11 +- src/vnet/fib/fib_path_ext.c | 3 + src/vnet/fib/fib_path_list.c | 4 +- src/vnet/fib/fib_path_list.h | 4 +- src/vnet/fib/fib_table.c | 6 +- src/vnet/fib/fib_table.h | 6 +- src/vnet/fib/fib_test.c | 338 ++++++++++++++--------------- src/vnet/fib/fib_types.h | 8 +- src/vnet/interface_format.c | 12 +- src/vnet/ip/ip4_forward.c | 6 +- src/vnet/ip/ip6_forward.c | 4 +- src/vnet/ip/ip6_neighbor.c | 10 +- src/vnet/ip/ip_api.c | 38 ++-- src/vnet/ip/lookup.c | 18 +- src/vnet/lisp-gpe/lisp_gpe.c | 13 +- src/vnet/lisp-gpe/lisp_gpe_api.c | 10 +- src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c | 5 +- src/vnet/mfib/ip6_mfib.c | 6 +- src/vnet/mfib/mfib_entry.c | 10 +- src/vnet/mfib/mfib_test.c | 14 +- src/vnet/mpls/mpls.api | 4 +- src/vnet/mpls/mpls.c | 36 +-- src/vnet/mpls/mpls_api.c | 21 +- src/vnet/mpls/mpls_tunnel.c | 75 ++++--- src/vnet/mpls/mpls_tunnel.h | 38 ++-- src/vnet/srmpls/sr_mpls_policy.c | 6 +- src/vnet/srmpls/sr_mpls_steering.c | 2 +- src/vnet/srv6/sr_steering.c | 4 +- src/vnet/vxlan-gpe/vxlan_gpe.c | 2 +- src/vnet/vxlan/vxlan.c | 2 +- src/vpp/app/vpe_cli.c | 2 +- test/test_bfd.py | 6 +- test/test_gre.py | 24 +- test/test_ip6.py | 26 +-- test/test_map.py | 19 +- test/test_mpls.py | 318 ++++++++++++++++++++++----- test/test_p2p_ethernet.py | 16 +- test/vpp_ip_route.py | 24 +- test/vpp_mpls_tunnel_interface.py | 6 +- test/vpp_papi_provider.py | 4 +- 62 files changed, 889 insertions(+), 556 deletions(-) (limited to 'src/vpp/app') diff --git a/src/plugins/gtpu/gtpu.c b/src/plugins/gtpu/gtpu.c index 84745bd8..3dfb4210 100755 --- a/src/plugins/gtpu/gtpu.c +++ b/src/plugins/gtpu/gtpu.c @@ -534,7 +534,7 @@ int vnet_gtpu_add_del_tunnel fib_node_index_t mfei; adj_index_t ai; fib_route_path_t path = { - .frp_proto = fp, + .frp_proto = fib_proto_to_dpo (fp), .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, diff --git a/src/plugins/snat/snat.c b/src/plugins/snat/snat.c index 9fbc1e54..f196b5c2 100644 --- a/src/plugins/snat/snat.c +++ b/src/plugins/snat/snat.c @@ -135,7 +135,7 @@ snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_EXCLUSIVE), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, sw_if_index, ~0, diff --git a/src/vat/api_format.c b/src/vat/api_format.c index f97cdeef..009cf173 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -7498,7 +7498,7 @@ api_mpls_route_add_del (vat_main_t * vam) mpls_label_t *next_hop_out_label_stack = NULL; mpls_label_t local_label = MPLS_LABEL_INVALID; u8 is_eos = 0; - u8 next_hop_proto_is_ip4 = 1; + dpo_proto_t next_hop_proto = DPO_PROTO_IP4; /* Parse args required to build the message */ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) @@ -7517,13 +7517,13 @@ api_mpls_route_add_del (vat_main_t * vam) &v4_next_hop_address)) { next_hop_set = 1; - next_hop_proto_is_ip4 = 1; + next_hop_proto = DPO_PROTO_IP4; } else if (unformat (i, "via %U", unformat_ip6_address, &v6_next_hop_address)) { next_hop_set = 1; - next_hop_proto_is_ip4 = 0; + next_hop_proto = DPO_PROTO_IP6; } else if (unformat (i, "weight %d", &next_hop_weight)) ; @@ -7548,12 +7548,12 @@ api_mpls_route_add_del (vat_main_t * vam) else if (unformat (i, "lookup-in-ip4-table %d", &next_hop_table_id)) { next_hop_set = 1; - next_hop_proto_is_ip4 = 1; + next_hop_proto = DPO_PROTO_IP4; } else if (unformat (i, "lookup-in-ip6-table %d", &next_hop_table_id)) { next_hop_set = 1; - next_hop_proto_is_ip4 = 0; + next_hop_proto = DPO_PROTO_IP6; } else if (unformat (i, "next-hop-table %d", &next_hop_table_id)) ; @@ -7599,7 +7599,7 @@ api_mpls_route_add_del (vat_main_t * vam) mp->mr_create_table_if_needed = create_table_if_needed; mp->mr_is_add = is_add; - mp->mr_next_hop_proto_is_ip4 = next_hop_proto_is_ip4; + mp->mr_next_hop_proto = next_hop_proto; mp->mr_is_classify = is_classify; mp->mr_is_multipath = is_multipath; mp->mr_is_resolve_host = resolve_host; @@ -7622,13 +7622,14 @@ api_mpls_route_add_del (vat_main_t * vam) if (next_hop_set) { - if (next_hop_proto_is_ip4) + if (DPO_PROTO_IP4 == next_hop_proto) { clib_memcpy (mp->mr_next_hop, &v4_next_hop_address, sizeof (v4_next_hop_address)); } - else + else if (DPO_PROTO_IP6 == next_hop_proto) + { clib_memcpy (mp->mr_next_hop, &v6_next_hop_address, diff --git a/src/vnet/dhcp/client.c b/src/vnet/dhcp/client.c index cfe62a6f..dd5e99f2 100644 --- a/src/vnet/dhcp/client.c +++ b/src/vnet/dhcp/client.c @@ -296,7 +296,7 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, &all_0s, FIB_SOURCE_DHCP, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh, c->sw_if_index, ~0, @@ -605,7 +605,7 @@ dhcp_bound_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now) c->sw_if_index), &all_0s, FIB_SOURCE_DHCP, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh, c->sw_if_index, ~0, @@ -900,7 +900,7 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a) c->sw_if_index), &all_0s, FIB_SOURCE_DHCP, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh, c->sw_if_index, ~0, diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index e109cc4c..9c2f5220 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -857,7 +857,7 @@ dhcp6_proxy_set_server (ip46_address_t *addr, else { const fib_route_path_t path_for_us = { - .frp_proto = FIB_PROTOCOL_IP6, + .frp_proto = DPO_PROTO_IP6, .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c index 389f995b..aa770838 100644 --- a/src/vnet/dpo/dpo.c +++ b/src/vnet/dpo/dpo.c @@ -109,6 +109,25 @@ vnet_link_to_dpo_proto (vnet_link_t linkt) return (0); } +vnet_link_t +dpo_proto_to_link (dpo_proto_t dp) +{ + switch (dp) + { + case DPO_PROTO_IP6: + return (VNET_LINK_IP6); + case DPO_PROTO_IP4: + return (VNET_LINK_IP4); + case DPO_PROTO_MPLS: + return (VNET_LINK_MPLS); + case DPO_PROTO_ETHERNET: + return (VNET_LINK_ETHERNET); + case DPO_PROTO_NSH: + return (VNET_LINK_NSH); + } + return (~0); +} + u8 * format_dpo_type (u8 * s, va_list * args) { diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h index 5aa4e2d2..42fc51d4 100644 --- a/src/vnet/dpo/dpo.h +++ b/src/vnet/dpo/dpo.h @@ -59,14 +59,10 @@ typedef u32 index_t; */ typedef enum dpo_proto_t_ { -#if CLIB_DEBUG > 0 - DPO_PROTO_IP4 = 1, -#else DPO_PROTO_IP4 = 0, -#endif DPO_PROTO_IP6, - DPO_PROTO_ETHERNET, DPO_PROTO_MPLS, + DPO_PROTO_ETHERNET, DPO_PROTO_NSH, } __attribute__((packed)) dpo_proto_t; @@ -272,6 +268,11 @@ extern u8 *format_dpo_type(u8 * s, va_list * args); */ extern u8 *format_dpo_proto(u8 * s, va_list * args); +/** + * @brief format a DPO protocol + */ +extern vnet_link_t dpo_proto_to_link(dpo_proto_t dp); + /** * @brief * Set and stack a DPO. diff --git a/src/vnet/dpo/interface_dpo.c b/src/vnet/dpo/interface_dpo.c index 8d700c23..780bfa2a 100644 --- a/src/vnet/dpo/interface_dpo.c +++ b/src/vnet/dpo/interface_dpo.c @@ -195,11 +195,17 @@ const static char* const interface_dpo_ip6_nodes[] = "interface-dpo-ip4", NULL, }; +const static char* const interface_dpo_l2_nodes[] = +{ + "interface-dpo-l2", + NULL, +}; const static char* const * const interface_dpo_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = interface_dpo_ip4_nodes, [DPO_PROTO_IP6] = interface_dpo_ip6_nodes, + [DPO_PROTO_ETHERNET] = interface_dpo_l2_nodes, [DPO_PROTO_MPLS] = NULL, }; @@ -382,6 +388,14 @@ interface_dpo_ip6 (vlib_main_t * vm, return (interface_dpo_inline(vm, node, from_frame)); } +static uword +interface_dpo_l2 (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (interface_dpo_inline(vm, node, from_frame)); +} + VLIB_REGISTER_NODE (interface_dpo_ip4_node) = { .function = interface_dpo_ip4, .name = "interface-dpo-ip4", @@ -414,3 +428,19 @@ VLIB_REGISTER_NODE (interface_dpo_ip6_node) = { VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_ip6_node, interface_dpo_ip6) +VLIB_REGISTER_NODE (interface_dpo_l2_node) = { + .function = interface_dpo_l2, + .name = "interface-dpo-l2", + .vector_size = sizeof (u32), + .format_trace = format_interface_dpo_trace, + + .n_next_nodes = 2, + .next_nodes = { + [INTERFACE_DPO_DROP] = "error-drop", + [INTERFACE_DPO_INPUT] = "l2-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (interface_dpo_l2_node, + interface_dpo_l2) + diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index 1c451a51..b178a902 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -192,7 +192,8 @@ mpls_label_imposition_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, u8 payload_is_ip4, - u8 payload_is_ip6) + u8 payload_is_ip6, + u8 payload_is_ethernet) { u32 n_left_from, next_index, * from, * to_next; @@ -320,6 +321,13 @@ mpls_label_imposition_inline (vlib_main_t * vm, ttl2 = ip2->hop_limit; ttl3 = ip3->hop_limit; } + else if (payload_is_ethernet) + { + /* + * nothing to chang ein the ethernet header + */ + ttl0 = ttl1 = ttl2 = ttl3 = 255; + } else { /* @@ -551,7 +559,7 @@ mpls_label_imposition (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return (mpls_label_imposition_inline(vm, node, frame, 0, 0)); + return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 0)); } VLIB_REGISTER_NODE (mpls_label_imposition_node) = { @@ -573,7 +581,7 @@ ip4_mpls_label_imposition (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return (mpls_label_imposition_inline(vm, node, frame, 1, 0)); + return (mpls_label_imposition_inline(vm, node, frame, 1, 0, 0)); } VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = { @@ -595,7 +603,7 @@ ip6_mpls_label_imposition (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return (mpls_label_imposition_inline(vm, node, frame, 0, 1)); + return (mpls_label_imposition_inline(vm, node, frame, 0, 1, 0)); } VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = { @@ -612,6 +620,28 @@ VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = { VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node, ip6_mpls_label_imposition) +static uword +ethernet_mpls_label_imposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 1)); +} + +VLIB_REGISTER_NODE (ethernet_mpls_label_imposition_node) = { + .function = ethernet_mpls_label_imposition, + .name = "ethernet-mpls-label-imposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_imposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (ethernet_mpls_label_imposition_node, + ethernet_mpls_label_imposition) + static void mpls_label_dpo_mem_show (void) { @@ -643,11 +673,18 @@ const static char* const mpls_label_imp_mpls_nodes[] = "mpls-label-imposition", NULL, }; +const static char* const mpls_label_imp_ethernet_nodes[] = +{ + "ethernet-mpls-label-imposition", + NULL, +}; + const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = mpls_label_imp_ip4_nodes, [DPO_PROTO_IP6] = mpls_label_imp_ip6_nodes, [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes, + [DPO_PROTO_ETHERNET] = mpls_label_imp_ethernet_nodes, }; diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index 4d9edaf5..8a394006 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -588,7 +588,7 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, e->fib_entry_index = fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, &pfx.fp_addr, + DPO_PROTO_IP4, &pfx.fp_addr, e->sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); } @@ -1621,7 +1621,7 @@ arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e) fib_table_entry_path_remove (fib_index, &pfx, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx.fp_addr, e->sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h index 73d76a42..d07d6cae 100644 --- a/src/vnet/fib/fib_api.h +++ b/src/vnet/fib/fib_api.h @@ -21,7 +21,7 @@ int add_del_route_check (fib_protocol_t table_proto, u32 table_id, u32 next_hop_sw_if_index, - fib_protocol_t next_hop_table_proto, + dpo_proto_t next_hop_table_proto, u32 next_hop_table_id, u8 create_missing_tables, u8 is_rpf_id, @@ -43,7 +43,7 @@ add_del_route_t_handler (u8 is_multipath, u8 is_rpf_id, u32 fib_index, const fib_prefix_t * prefix, - u8 next_hop_proto_is_ip4, + dpo_proto_t next_hop_proto, const ip46_address_t * next_hop, u32 next_hop_sw_if_index, u8 next_hop_fib_index, diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index d7ff1c8c..2027f2be 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -58,12 +58,18 @@ fib_entry_get_index (const fib_entry_t * fib_entry) return (fib_entry - fib_entry_pool); } -static fib_protocol_t +fib_protocol_t fib_entry_get_proto (const fib_entry_t * fib_entry) { return (fib_entry->fe_prefix.fp_proto); } +dpo_proto_t +fib_entry_get_dpo_proto (const fib_entry_t * fib_entry) +{ + return (fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto)); +} + fib_forward_chain_type_t fib_entry_get_default_chain_type (const fib_entry_t *fib_entry) { diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c index ff73cbf9..173df74f 100644 --- a/src/vnet/fib/fib_entry_src.c +++ b/src/vnet/fib/fib_entry_src.c @@ -29,12 +29,6 @@ */ static fib_entry_src_vft_t fib_entry_src_vft[FIB_SOURCE_MAX]; -static fib_protocol_t -fib_entry_get_proto (const fib_entry_t * fib_entry) -{ - return (fib_entry->fe_prefix.fp_proto); -} - void fib_entry_src_register (fib_source_t source, const fib_entry_src_vft_t *vft) @@ -861,7 +855,7 @@ fib_entry_src_action_add (fib_entry_t *fib_entry, fib_entry_src_vft[source].fesv_add(esrc, fib_entry, flags, - fib_entry_get_proto(fib_entry), + fib_entry_get_dpo_proto(fib_entry), dpo); } @@ -914,7 +908,7 @@ fib_entry_src_action_update (fib_entry_t *fib_entry, fib_entry_src_vft[source].fesv_add(esrc, fib_entry, flags, - fib_entry_get_proto(fib_entry), + fib_entry_get_dpo_proto(fib_entry), dpo); } @@ -1106,8 +1100,7 @@ fib_entry_src_action_path_add (fib_entry_t *fib_entry, source, flags, drop_dpo_get( - fib_proto_to_dpo( - fib_entry_get_proto(fib_entry)))); + fib_entry_get_dpo_proto(fib_entry))); esrc = fib_entry_src_find(fib_entry, source, NULL); } @@ -1166,8 +1159,7 @@ fib_entry_src_action_path_swap (fib_entry_t *fib_entry, source, flags, drop_dpo_get( - fib_proto_to_dpo( - fib_entry_get_proto(fib_entry)))); + fib_entry_get_dpo_proto(fib_entry))); esrc = fib_entry_src_find(fib_entry, source, NULL); } diff --git a/src/vnet/fib/fib_entry_src.h b/src/vnet/fib/fib_entry_src.h index 640c174d..35c43936 100644 --- a/src/vnet/fib/fib_entry_src.h +++ b/src/vnet/fib/fib_entry_src.h @@ -73,7 +73,7 @@ typedef void (*fib_entry_src_deactivate_t)(fib_entry_src_t *src, typedef void (*fib_entry_src_add_t)(fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo); /** @@ -277,6 +277,8 @@ extern void fib_entry_src_mk_lb (fib_entry_t *fib_entry, fib_forward_chain_type_t fct, dpo_id_t *dpo_lb); +extern fib_protocol_t fib_entry_get_proto(const fib_entry_t * fib_entry); +extern dpo_proto_t fib_entry_get_dpo_proto(const fib_entry_t * fib_entry); /* * Per-source registration. declared here so we save a separate .h file for each diff --git a/src/vnet/fib/fib_entry_src_api.c b/src/vnet/fib/fib_entry_src_api.c index f895886b..1cdcfbde 100644 --- a/src/vnet/fib/fib_entry_src_api.c +++ b/src/vnet/fib/fib_entry_src_api.c @@ -131,7 +131,7 @@ static void fib_entry_src_api_add (fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo) { if (FIB_ENTRY_FLAG_NONE != flags) diff --git a/src/vnet/fib/fib_entry_src_default_route.c b/src/vnet/fib/fib_entry_src_default_route.c index 9f4e7c36..431abb66 100644 --- a/src/vnet/fib/fib_entry_src_default_route.c +++ b/src/vnet/fib/fib_entry_src_default_route.c @@ -35,7 +35,7 @@ static void fib_entry_src_default_route_add (fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo) { src->fes_pl = fib_path_list_create_special(proto, diff --git a/src/vnet/fib/fib_entry_src_interface.c b/src/vnet/fib/fib_entry_src_interface.c index bb87818f..6c087f34 100644 --- a/src/vnet/fib/fib_entry_src_interface.c +++ b/src/vnet/fib/fib_entry_src_interface.c @@ -35,7 +35,7 @@ static void fib_entry_src_interface_add (fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo) { src->fes_pl = fib_path_list_create_special( diff --git a/src/vnet/fib/fib_entry_src_lisp.c b/src/vnet/fib/fib_entry_src_lisp.c index 7f8b91bb..e72dce63 100644 --- a/src/vnet/fib/fib_entry_src_lisp.c +++ b/src/vnet/fib/fib_entry_src_lisp.c @@ -79,10 +79,10 @@ fib_entry_src_lisp_path_remove (fib_entry_src_t *src, static void fib_entry_src_lisp_add (fib_entry_src_t *src, - const fib_entry_t *entry, - fib_entry_flag_t flags, - fib_protocol_t proto, - const dpo_id_t *dpo) + const fib_entry_t *entry, + fib_entry_flag_t flags, + dpo_proto_t proto, + const dpo_id_t *dpo) { if (FIB_ENTRY_FLAG_NONE != flags) { diff --git a/src/vnet/fib/fib_entry_src_mpls.c b/src/vnet/fib/fib_entry_src_mpls.c index 14c7310f..a616458f 100644 --- a/src/vnet/fib/fib_entry_src_mpls.c +++ b/src/vnet/fib/fib_entry_src_mpls.c @@ -57,13 +57,13 @@ static void fib_entry_src_mpls_add (fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo) { src->fes_pl = fib_path_list_create_special(proto, FIB_PATH_LIST_FLAG_DROP, - drop_dpo_get(fib_proto_to_dpo(proto))); + drop_dpo_get(proto)); } static void diff --git a/src/vnet/fib/fib_entry_src_rr.c b/src/vnet/fib/fib_entry_src_rr.c index d66ef7b1..1153f3f1 100644 --- a/src/vnet/fib/fib_entry_src_rr.c +++ b/src/vnet/fib/fib_entry_src_rr.c @@ -35,7 +35,7 @@ fib_entry_src_rr_resolve_via_connected (fib_entry_src_t *src, const fib_entry_t *cover) { const fib_route_path_t path = { - .frp_proto = fib_entry->fe_prefix.fp_proto, + .frp_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto), .frp_addr = fib_entry->fe_prefix.fp_addr, .frp_sw_if_index = fib_entry_get_resolving_interface( fib_entry_get_index(cover)), @@ -90,18 +90,17 @@ fib_entry_src_rr_use_covers_pl (fib_entry_src_t *src, const fib_entry_t *cover) { fib_node_index_t *entries = NULL; - fib_protocol_t proto; + dpo_proto_t proto; - proto = fib_entry->fe_prefix.fp_proto; + proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto); vec_add1(entries, fib_entry_get_index(fib_entry)); if (fib_path_list_recursive_loop_detect(cover->fe_parent, &entries)) { - src->fes_pl = fib_path_list_create_special( - proto, - FIB_PATH_LIST_FLAG_DROP, - drop_dpo_get(fib_proto_to_dpo(proto))); + src->fes_pl = fib_path_list_create_special(proto, + FIB_PATH_LIST_FLAG_DROP, + drop_dpo_get(proto)); } else { @@ -126,7 +125,7 @@ fib_entry_src_rr_activate (fib_entry_src_t *src, */ if (FIB_PROTOCOL_MPLS == fib_entry->fe_prefix.fp_proto) { - src->fes_pl = fib_path_list_create_special(FIB_PROTOCOL_MPLS, + src->fes_pl = fib_path_list_create_special(DPO_PROTO_MPLS, FIB_PATH_LIST_FLAG_DROP, NULL); fib_path_list_lock(src->fes_pl); diff --git a/src/vnet/fib/fib_entry_src_special.c b/src/vnet/fib/fib_entry_src_special.c index 75605d7f..e979e18f 100644 --- a/src/vnet/fib/fib_entry_src_special.c +++ b/src/vnet/fib/fib_entry_src_special.c @@ -43,7 +43,7 @@ static void fib_entry_src_special_add (fib_entry_src_t *src, const fib_entry_t *entry, fib_entry_flag_t flags, - fib_protocol_t proto, + dpo_proto_t proto, const dpo_id_t *dpo) { src->fes_pl = diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 3a67a544..58050ccb 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -193,7 +193,7 @@ typedef struct fib_path_t_ { * next-hop's address. We can't derive this from the address itself * since the address can be all zeros */ - fib_protocol_t fp_nh_proto; + dpo_proto_t fp_nh_proto; /** * UCMP [unnormalised] weigth @@ -381,7 +381,7 @@ format_fib_path (u8 * s, va_list * args) s = format (s, " index:%d ", fib_path_get_index(path)); s = format (s, "pl-index:%d ", path->fp_pl_index); - s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto); + s = format (s, "%U ", format_dpo_proto, path->fp_nh_proto); s = format (s, "weight=%d ", path->fp_weight); s = format (s, "pref=%d ", path->fp_preference); s = format (s, "%s: ", fib_path_type_names[path->fp_type]); @@ -454,7 +454,7 @@ format_fib_path (u8 * s, va_list * args) } break; case FIB_PATH_TYPE_RECURSIVE: - if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + if (DPO_PROTO_MPLS == path->fp_nh_proto) { s = format (s, "via %U %U", format_mpls_unicast_label, @@ -552,14 +552,14 @@ fib_path_attached_next_hop_get_adj (fib_path_t *path, * the subnet address (the attached route) links to the * auto-adj (see below), we want that adj here too. */ - return (adj_nbr_add_or_lock(path->fp_nh_proto, + return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto), link, &zero_addr, path->attached_next_hop.fp_interface)); } else { - return (adj_nbr_add_or_lock(path->fp_nh_proto, + return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto), link, &path->attached_next_hop.fp_nh, path->attached_next_hop.fp_interface)); @@ -575,10 +575,10 @@ fib_path_attached_next_hop_set (fib_path_t *path) */ dpo_set(&path->fp_dpo, DPO_ADJACENCY, - fib_proto_to_dpo(path->fp_nh_proto), + path->fp_nh_proto, fib_path_attached_next_hop_get_adj( path, - fib_proto_to_link(path->fp_nh_proto))); + dpo_proto_to_link(path->fp_nh_proto))); /* * become a child of the adjacency so we receive updates @@ -607,14 +607,14 @@ fib_path_attached_get_adj (fib_path_t *path, * point-2-point interfaces do not require a glean, since * there is nothing to ARP. Install a rewrite/nbr adj instead */ - return (adj_nbr_add_or_lock(path->fp_nh_proto, + return (adj_nbr_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto), link, &zero_addr, path->attached.fp_interface)); } else { - return (adj_glean_add_or_lock(path->fp_nh_proto, + return (adj_glean_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto), path->attached.fp_interface, NULL)); } @@ -650,7 +650,7 @@ fib_path_recursive_adj_update (fib_path_t *path, if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP) { path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; - dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto)); } else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST) { @@ -668,7 +668,7 @@ fib_path_recursive_adj_update (fib_path_t *path, if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR) { path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; - dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto)); /* * PIC edge trigger. let the load-balance maps know @@ -685,7 +685,7 @@ fib_path_recursive_adj_update (fib_path_t *path, if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib))) { path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; - dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto)); /* * PIC edge trigger. let the load-balance maps know @@ -699,7 +699,7 @@ fib_path_recursive_adj_update (fib_path_t *path, if (!fib_entry_is_resolved(path->fp_via_fib)) { path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; - dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&via_dpo, drop_dpo_get(path->fp_nh_proto)); /* * PIC edge trigger. let the load-balance maps know @@ -720,9 +720,7 @@ fib_path_recursive_adj_update (fib_path_t *path, */ dpo_copy(dpo, &via_dpo); - FIB_PATH_DBG(path, "recursive update: %U", - fib_get_lookup_main(path->fp_nh_proto), - &path->fp_dpo, 2); + FIB_PATH_DBG(path, "recursive update:"); dpo_reset(&via_dpo); } @@ -804,13 +802,8 @@ fib_path_unresolve (fib_path_t *path) static fib_forward_chain_type_t fib_path_to_chain_type (const fib_path_t *path) { - switch (path->fp_nh_proto) + if (DPO_PROTO_MPLS == path->fp_nh_proto) { - case FIB_PROTOCOL_IP4: - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); - case FIB_PROTOCOL_IP6: - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); - case FIB_PROTOCOL_MPLS: if (FIB_PATH_TYPE_RECURSIVE == path->fp_type && MPLS_EOS == path->recursive.fp_nh.fp_eos) { @@ -821,7 +814,10 @@ fib_path_to_chain_type (const fib_path_t *path) return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); } } - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + else + { + return (fib_forw_chain_type_from_dpo_proto(path->fp_nh_proto)); + } } /* @@ -927,7 +923,7 @@ FIXME comment ai = fib_path_attached_next_hop_get_adj( path, - fib_proto_to_link(path->fp_nh_proto)); + dpo_proto_to_link(path->fp_nh_proto)); path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; if (if_is_up && adj_is_up(ai)) @@ -935,9 +931,7 @@ FIXME comment path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; } - dpo_set(&path->fp_dpo, DPO_ADJACENCY, - fib_proto_to_dpo(path->fp_nh_proto), - ai); + dpo_set(&path->fp_dpo, DPO_ADJACENCY, path->fp_nh_proto, ai); adj_unlock(ai); if (!if_is_up) @@ -1141,7 +1135,7 @@ fib_path_create (fib_node_index_t pl_index, else { path->fp_type = FIB_PATH_TYPE_RECURSIVE; - if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + if (DPO_PROTO_MPLS == path->fp_nh_proto) { path->recursive.fp_nh.fp_local_label = rpath->frp_local_label; path->recursive.fp_nh.fp_eos = rpath->frp_eos; @@ -1167,7 +1161,7 @@ fib_path_create (fib_node_index_t pl_index, */ fib_node_index_t fib_path_create_special (fib_node_index_t pl_index, - fib_protocol_t nh_proto, + dpo_proto_t nh_proto, fib_path_cfg_flags_t flags, const dpo_id_t *dpo) { @@ -1433,7 +1427,7 @@ fib_path_cmp_w_route_path (fib_node_index_t path_index, res = (path->attached.fp_interface - rpath->frp_sw_if_index); break; case FIB_PATH_TYPE_RECURSIVE: - if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + if (DPO_PROTO_MPLS == path->fp_nh_proto) { res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label; @@ -1535,8 +1529,7 @@ fib_path_recursive_loop_detect (fib_node_index_t path_index, FIB_PATH_DBG(path, "recursive loop formed"); path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP; - dpo_copy(&path->fp_dpo, - drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto)); } else { @@ -1590,8 +1583,7 @@ fib_path_resolve (fib_node_index_t path_index) */ if (fib_path_is_permanent_drop(path)) { - dpo_copy(&path->fp_dpo, - drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto)); path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; return (fib_path_is_resolved(path_index)); } @@ -1612,9 +1604,9 @@ fib_path_resolve (fib_node_index_t path_index) } dpo_set(&path->fp_dpo, DPO_ADJACENCY, - fib_proto_to_dpo(path->fp_nh_proto), + path->fp_nh_proto, fib_path_attached_get_adj(path, - fib_proto_to_link(path->fp_nh_proto))); + dpo_proto_to_link(path->fp_nh_proto))); /* * become a child of the adjacency so we receive updates @@ -1639,7 +1631,7 @@ fib_path_resolve (fib_node_index_t path_index) ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib); - if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + if (DPO_PROTO_MPLS == path->fp_nh_proto) { fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, path->recursive.fp_nh.fp_eos, @@ -1680,8 +1672,7 @@ fib_path_resolve (fib_node_index_t path_index) /* * Resolve via the drop */ - dpo_copy(&path->fp_dpo, - drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + dpo_copy(&path->fp_dpo, drop_dpo_get(path->fp_nh_proto)); break; case FIB_PATH_TYPE_DEAG: { @@ -1696,7 +1687,7 @@ fib_path_resolve (fib_node_index_t path_index) LOOKUP_UNICAST); lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id, - fib_proto_to_dpo(path->fp_nh_proto), + path->fp_nh_proto, cast, LOOKUP_INPUT_DST_ADDR, LOOKUP_TABLE_FROM_CONFIG, @@ -1707,7 +1698,7 @@ fib_path_resolve (fib_node_index_t path_index) /* * Resolve via a receive DPO. */ - receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto), + receive_dpo_add_or_lock(path->fp_nh_proto, path->receive.fp_interface, &path->receive.fp_addr, &path->fp_dpo); @@ -1716,7 +1707,7 @@ fib_path_resolve (fib_node_index_t path_index) /* * Resolve via a receive DPO. */ - interface_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto), + interface_dpo_add_or_lock(path->fp_nh_proto, path->intf_rx.fp_interface, &path->fp_dpo); break; @@ -2035,7 +2026,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, /* * Create the adj needed for sending IP multicast traffic */ - ai = adj_mcast_add_or_lock(path->fp_nh_proto, + ai = adj_mcast_add_or_lock(dpo_proto_to_fib(path->fp_nh_proto), fib_forw_chain_type_to_link_type(fct), path->attached.fp_interface); dpo_set(dpo, DPO_ADJACENCY, @@ -2187,7 +2178,7 @@ fib_path_encode (fib_node_index_t path_list_index, return (FIB_PATH_LIST_WALK_CONTINUE); } -fib_protocol_t +dpo_proto_t fib_path_get_proto (fib_node_index_t path_index) { fib_path_t *path; diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h index a34cb43f..f986e437 100644 --- a/src/vnet/fib/fib_path.h +++ b/src/vnet/fib/fib_path.h @@ -78,6 +78,11 @@ typedef enum fib_path_cfg_attribute_t_ { * The path is an interface recieve */ FIB_PATH_CFG_ATTRIBUTE_LOCAL, + /** + * The path is L2. i.e. the parameters therein are to be interpreted as + * pertaining to L2 config. + */ + FIB_PATH_CFG_ATTRIBUTE_L2, /** * Marker. Add new types before this one, then update it. */ @@ -98,6 +103,7 @@ typedef enum fib_path_cfg_attribute_t_ { [FIB_PATH_CFG_ATTRIBUTE_ATTACHED] = "attached", \ [FIB_PATH_CFG_ATTRIBUTE_INTF_RX] = "interface-rx", \ [FIB_PATH_CFG_ATTRIBUTE_RPF_ID] = "rpf-id", \ + [FIB_PATH_CFG_ATTRIBUTE_L2] = "l2", \ } #define FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(_item) \ @@ -118,6 +124,7 @@ typedef enum fib_path_cfg_flags_t_ { FIB_PATH_CFG_FLAG_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_ATTACHED), FIB_PATH_CFG_FLAG_INTF_RX = (1 << FIB_PATH_CFG_ATTRIBUTE_INTF_RX), FIB_PATH_CFG_FLAG_RPF_ID = (1 << FIB_PATH_CFG_ATTRIBUTE_RPF_ID), + FIB_PATH_CFG_FLAG_L2 = (1 << FIB_PATH_CFG_ATTRIBUTE_L2), } __attribute__ ((packed)) fib_path_cfg_flags_t; @@ -131,7 +138,7 @@ extern u8 * format_fib_path(u8 * s, va_list * args); extern fib_node_index_t fib_path_create(fib_node_index_t pl_index, const fib_route_path_t *path); extern fib_node_index_t fib_path_create_special(fib_node_index_t pl_index, - fib_protocol_t nh_proto, + dpo_proto_t nh_proto, fib_path_cfg_flags_t flags, const dpo_id_t *dpo); @@ -148,7 +155,7 @@ extern int fib_path_is_recursive_constrained(fib_node_index_t path_index); extern int fib_path_is_exclusive(fib_node_index_t path_index); extern int fib_path_is_deag(fib_node_index_t path_index); extern int fib_path_is_looped(fib_node_index_t path_index); -extern fib_protocol_t fib_path_get_proto(fib_node_index_t path_index); +extern dpo_proto_t fib_path_get_proto(fib_node_index_t path_index); extern void fib_path_destroy(fib_node_index_t path_index); extern uword fib_path_hash(fib_node_index_t path_index); extern load_balance_path_t * fib_path_append_nh_for_multipath_hash( diff --git a/src/vnet/fib/fib_path_ext.c b/src/vnet/fib/fib_path_ext.c index 26f2b9b6..4438671b 100644 --- a/src/vnet/fib/fib_path_ext.c +++ b/src/vnet/fib/fib_path_ext.c @@ -191,6 +191,9 @@ fib_path_ext_stack (fib_path_ext_t *path_ext, case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: parent_fct = child_fct; break; + case FIB_FORW_CHAIN_TYPE_ETHERNET: + parent_fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS; + break; default: return (nhs); break; diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c index 7a9c328c..f30fd7ea 100644 --- a/src/vnet/fib/fib_path_list.c +++ b/src/vnet/fib/fib_path_list.c @@ -611,7 +611,7 @@ fib_path_list_get_resolving_interface (fib_node_index_t path_list_index) return (sw_if_index); } -fib_protocol_t +dpo_proto_t fib_path_list_get_proto (fib_node_index_t path_list_index) { fib_path_list_t *path_list; @@ -753,7 +753,7 @@ fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf) } fib_node_index_t -fib_path_list_create_special (fib_protocol_t nh_proto, +fib_path_list_create_special (dpo_proto_t nh_proto, fib_path_list_flags_t flags, const dpo_id_t *dpo) { diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h index b4b6985b..a54b79e2 100644 --- a/src/vnet/fib/fib_path_list.h +++ b/src/vnet/fib/fib_path_list.h @@ -106,7 +106,7 @@ typedef enum fib_path_list_flags_t_ { extern fib_node_index_t fib_path_list_create(fib_path_list_flags_t flags, const fib_route_path_t *paths); -extern fib_node_index_t fib_path_list_create_special(fib_protocol_t nh_proto, +extern fib_node_index_t fib_path_list_create_special(dpo_proto_t nh_proto, fib_path_list_flags_t flags, const dpo_id_t *dpo); @@ -150,7 +150,7 @@ extern int fib_path_list_recursive_loop_detect(fib_node_index_t path_list_index, extern u32 fib_path_list_get_resolving_interface(fib_node_index_t path_list_index); extern int fib_path_list_is_looped(fib_node_index_t path_list_index); extern int fib_path_list_is_popular(fib_node_index_t path_list_index); -extern fib_protocol_t fib_path_list_get_proto(fib_node_index_t path_list_index); +extern dpo_proto_t fib_path_list_get_proto(fib_node_index_t path_list_index); extern u8 * fib_path_list_format(fib_node_index_t pl_index, u8 * s); extern index_t fib_path_list_lb_map_add_or_lock(fib_node_index_t pl_index, diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 5aa02dd0..6b6cc5cb 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -505,7 +505,7 @@ fib_table_entry_path_add (u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, @@ -664,7 +664,7 @@ void fib_table_entry_path_remove (u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, @@ -755,7 +755,7 @@ fib_table_entry_update_one_path (u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index a65fea74..579740e9 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -288,7 +288,7 @@ extern fib_node_index_t fib_table_entry_path_add(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, @@ -364,7 +364,7 @@ extern fib_node_index_t fib_table_entry_path_add2(u32 fib_index, extern void fib_table_entry_path_remove(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, @@ -471,7 +471,7 @@ extern fib_node_index_t fib_table_entry_update_one_path(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags, - fib_protocol_t next_hop_proto, + dpo_proto_t next_hop_proto, const ip46_address_t *next_hop, u32 next_hop_sw_if_index, u32 next_hop_fib_index, diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 4c891667..59d5da2a 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -833,7 +833,7 @@ fib_test_v4 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -861,7 +861,7 @@ fib_test_v4 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -911,7 +911,7 @@ fib_test_v4 (void) fib_table_entry_path_add(fib_index, &pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -963,7 +963,7 @@ fib_test_v4 (void) pfx.fp_len = 0; fib_table_entry_path_remove(fib_index, &pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // non-recursive path, so no FIB index @@ -1029,7 +1029,7 @@ fib_test_v4 (void) &pfx_11_11_11_11_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_1_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1095,7 +1095,7 @@ fib_test_v4 (void) &pfx_10_10_10_1_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_1_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1110,7 +1110,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_11_11_11_11_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_1_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1144,7 +1144,7 @@ fib_test_v4 (void) &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_2_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1181,7 +1181,7 @@ fib_test_v4 (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1214,7 +1214,7 @@ fib_test_v4 (void) &pfx_1_1_2_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1241,7 +1241,7 @@ fib_test_v4 (void) &pfx_1_1_2_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1280,7 +1280,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_2_0_s_24, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, @@ -1327,7 +1327,7 @@ fib_test_v4 (void) &bgp_100_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_1_1_1_1, ~0, // no index provided. fib_index, // nexthop in same fib as route @@ -1363,7 +1363,7 @@ fib_test_v4 (void) &bgp_101_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_1_1_1_1, ~0, // no index provided. fib_index, // nexthop in same fib as route @@ -1487,7 +1487,7 @@ fib_test_v4 (void) &bgp_200_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, // no index provided. fib_index, // nexthop in same fib as route @@ -1534,7 +1534,7 @@ fib_test_v4 (void) &pfx_1_2_3_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -1545,7 +1545,7 @@ fib_test_v4 (void) &pfx_1_2_3_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, ~0, @@ -1586,7 +1586,7 @@ fib_test_v4 (void) &pfx_1_2_3_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, ~0, @@ -1597,7 +1597,7 @@ fib_test_v4 (void) &pfx_1_2_3_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -1669,7 +1669,7 @@ fib_test_v4 (void) &pfx_6_6_6_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1688,7 +1688,7 @@ fib_test_v4 (void) &pfx_6_6_6_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1770,7 +1770,7 @@ fib_test_v4 (void) &pfx_6_6_6_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -1915,7 +1915,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_6_6_6_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -1995,7 +1995,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_6_6_6_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -2026,7 +2026,7 @@ fib_test_v4 (void) &bgp_44_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_2_3_4_s_32.fp_addr, ~0, fib_index, @@ -2037,7 +2037,7 @@ fib_test_v4 (void) &bgp_44_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_2_3_5_s_32.fp_addr, ~0, fib_index, @@ -2107,7 +2107,7 @@ fib_test_v4 (void) &bgp_201_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_200_s_32.fp_addr, ~0, // no index provided. fib_index, // nexthop in same fib as route @@ -2151,7 +2151,7 @@ fib_test_v4 (void) &pfx_1_1_1_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -2209,7 +2209,7 @@ fib_test_v4 (void) &pfx_1_1_1_0_s_28, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -2244,7 +2244,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_0_s_28, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, @@ -2275,7 +2275,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_0_s_24, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -2316,7 +2316,7 @@ fib_test_v4 (void) &pfx_1_1_1_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -2351,7 +2351,7 @@ fib_test_v4 (void) &bgp_201_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -2362,7 +2362,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_201_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -2375,7 +2375,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_201_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_200_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2405,7 +2405,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2446,7 +2446,7 @@ fib_test_v4 (void) &bgp_102, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route @@ -2457,7 +2457,7 @@ fib_test_v4 (void) &bgp_102, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2483,7 +2483,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_102, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2492,7 +2492,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_102, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2507,7 +2507,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_100_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2516,7 +2516,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_101_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2546,7 +2546,7 @@ fib_test_v4 (void) &bgp_200_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, ~0, // no index provided. fib_index, // Same as route's FIB @@ -2593,7 +2593,7 @@ fib_test_v4 (void) &bgp_201_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, ~0, // no index provided. fib_index, @@ -2639,7 +2639,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, ~0, // no index provided. fib_index, // same as route's FIB @@ -2648,7 +2648,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_201_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, ~0, // no index provided. fib_index, // same as route's FIB @@ -2707,7 +2707,7 @@ fib_test_v4 (void) &pfx_5_5_5_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_6_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2718,7 +2718,7 @@ fib_test_v4 (void) &pfx_5_5_5_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_7_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2729,7 +2729,7 @@ fib_test_v4 (void) &pfx_5_5_5_7_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_5_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2768,7 +2768,7 @@ fib_test_v4 (void) &pfx_5_5_5_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -2801,7 +2801,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -2826,7 +2826,7 @@ fib_test_v4 (void) &pfx_5_5_5_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -2868,7 +2868,7 @@ fib_test_v4 (void) &pfx_5_5_5_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_6_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2892,7 +2892,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_5_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_6_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2901,7 +2901,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_7_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2910,7 +2910,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_7_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_5_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2919,7 +2919,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, ~0, // no index provided. fib_index, // same as route's FIB @@ -2943,7 +2943,7 @@ fib_test_v4 (void) &pfx_5_5_5_6_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_6_s_32.fp_addr, ~0, // no index provided. fib_index, @@ -2957,7 +2957,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_5_5_5_6_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_5_5_5_6_s_32.fp_addr, ~0, // no index provided. fib_index, // same as route's FIB @@ -2991,7 +2991,7 @@ fib_test_v4 (void) &pfx_23_23_23_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_23_23_23_23_s_32.fp_addr, ~0, // recursive fib_index, @@ -3021,7 +3021,7 @@ fib_test_v4 (void) &pfx_0_0_0_0_s_0, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_23_23_23_23_s_32.fp_addr, ~0, // recursive fib_index, @@ -3051,7 +3051,7 @@ fib_test_v4 (void) &bgp_200_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_1_1_1_1, ~0, fib_index, @@ -3081,7 +3081,7 @@ fib_test_v4 (void) &pfx_1_1_1_0_s_28, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3099,7 +3099,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3116,7 +3116,7 @@ fib_test_v4 (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3140,7 +3140,7 @@ fib_test_v4 (void) &pfx_1_1_1_3_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3152,7 +3152,7 @@ fib_test_v4 (void) &bgp_200_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_3_s_32.fp_addr, ~0, fib_index, @@ -3177,7 +3177,7 @@ fib_test_v4 (void) &bgp_78s[ii], FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_3_s_32.fp_addr, ~0, fib_index, @@ -3188,7 +3188,7 @@ fib_test_v4 (void) &bgp_78s[ii], FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_1_1_1_1, ~0, fib_index, @@ -3238,7 +3238,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3277,7 +3277,7 @@ fib_test_v4 (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3307,7 +3307,7 @@ fib_test_v4 (void) &bgp_200_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -3320,7 +3320,7 @@ fib_test_v4 (void) &bgp_78s[ii], FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -3354,7 +3354,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3391,7 +3391,7 @@ fib_test_v4 (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3412,7 +3412,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -3421,7 +3421,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_1_1_1_1, ~0, fib_index, @@ -3430,7 +3430,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &bgp_200_pfx, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_3_s_32.fp_addr, ~0, fib_index, @@ -3481,7 +3481,7 @@ fib_test_v4 (void) &pfx_4_4_4_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3492,7 +3492,7 @@ fib_test_v4 (void) &pfx_4_4_4_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, @@ -3503,7 +3503,7 @@ fib_test_v4 (void) &pfx_4_4_4_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, tm->hw[0]->sw_if_index, ~0, @@ -3539,7 +3539,7 @@ fib_test_v4 (void) for (ii = 0; ii < 4; ii++) { fib_route_path_t r_path = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_addr = { .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii), }, @@ -3588,7 +3588,7 @@ fib_test_v4 (void) &pfx_4_4_4_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &zero_addr, ~0, fib_index, @@ -3648,7 +3648,7 @@ fib_test_v4 (void) &pfx_34_34_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, 0, @@ -3659,7 +3659,7 @@ fib_test_v4 (void) &pfx_34_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_34_34_1_1_s_32.fp_addr, ~0, fib_index, @@ -3670,7 +3670,7 @@ fib_test_v4 (void) &pfx_34_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_34_34_1_1_s_32.fp_addr, ~0, fib_index, @@ -3691,7 +3691,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_2_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3700,7 +3700,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3709,7 +3709,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_2_0_s_24, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, @@ -3751,7 +3751,7 @@ fib_test_v4 (void) &pfx_4_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &zero_addr, tm->hw[0]->sw_if_index, fib_index, @@ -3805,7 +3805,7 @@ fib_test_v4 (void) &pfx_2001_s_64, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, fib_index, @@ -3863,7 +3863,7 @@ fib_test_v4 (void) &pfx_12_10_10_2_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_12_10_10_2_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -3897,7 +3897,7 @@ fib_test_v4 (void) &pfx_10_10_10_127_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_127_s_32.fp_addr, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -3945,7 +3945,7 @@ fib_test_v4 (void) &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, tm->hw[0]->sw_if_index, fib_index, @@ -3956,7 +3956,7 @@ fib_test_v4 (void) &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, fib_index, @@ -3975,7 +3975,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, tm->hw[0]->sw_if_index, fib_index, @@ -3992,7 +3992,7 @@ fib_test_v4 (void) &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, tm->hw[0]->sw_if_index, fib_index, @@ -4011,7 +4011,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_12_12_12_12, tm->hw[1]->sw_if_index, fib_index, @@ -4030,7 +4030,7 @@ fib_test_v4 (void) fib_table_entry_path_remove(fib_index, &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_3, tm->hw[0]->sw_if_index, fib_index, @@ -4269,7 +4269,7 @@ fib_test_v6 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[0]->sw_if_index, ~0, @@ -4300,7 +4300,7 @@ fib_test_v6 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -4345,7 +4345,7 @@ fib_test_v6 (void) fib_table_entry_path_add(fib_index, &pfx_0_0, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh_2001_2, tm->hw[0]->sw_if_index, ~0, @@ -4389,7 +4389,7 @@ fib_test_v6 (void) */ fib_table_entry_path_remove(fib_index, &pfx_0_0, FIB_SOURCE_API, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh_2001_2, tm->hw[0]->sw_if_index, ~0, @@ -4466,7 +4466,7 @@ fib_test_v6 (void) &pfx_2001_1_2_s_128, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &pfx_2001_1_2_s_128.fp_addr, tm->hw[0]->sw_if_index, ~0, @@ -4505,7 +4505,7 @@ fib_test_v6 (void) &pfx_2001_1_3_s_128, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &pfx_2001_1_3_s_128.fp_addr, tm->hw[0]->sw_if_index, ~0, @@ -4559,7 +4559,7 @@ fib_test_v6 (void) &pfx_2001_a_s_64, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh_2001_2, tm->hw[0]->sw_if_index, ~0, @@ -4573,7 +4573,7 @@ fib_test_v6 (void) &pfx_2001_b_s_64, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh_2001_2, tm->hw[0]->sw_if_index, ~0, @@ -4608,7 +4608,7 @@ fib_test_v6 (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh_2001_2, tm->hw[0]->sw_if_index, ~0, @@ -4646,7 +4646,7 @@ fib_test_v6 (void) &pfx_2001_c_s_64, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[0]->sw_if_index, ~0, @@ -4663,7 +4663,7 @@ fib_test_v6 (void) fib_table_entry_path_remove(fib_index, &pfx_2001_c_s_64, FIB_SOURCE_CLI, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[0]->sw_if_index, ~0, @@ -4748,7 +4748,7 @@ fib_test_v6 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[1]->sw_if_index, ~0, @@ -4767,7 +4767,7 @@ fib_test_v6 (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5095,7 +5095,7 @@ fib_test_ae (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, @@ -5111,7 +5111,7 @@ fib_test_ae (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5140,7 +5140,7 @@ fib_test_ae (void) &pfx_10_10_10_1_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_1_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5167,7 +5167,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5209,7 +5209,7 @@ fib_test_ae (void) &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_2_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5243,7 +5243,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5280,7 +5280,7 @@ fib_test_ae (void) &pfx_10_10_10_3_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_3_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5352,7 +5352,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_2_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5375,7 +5375,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5407,7 +5407,7 @@ fib_test_ae (void) &pfx_10_0_0_0_s_8, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_3_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5463,7 +5463,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_10_10_10_1_s_32.fp_addr, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5499,7 +5499,7 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -5538,7 +5538,7 @@ fib_test_ae (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, @@ -5626,7 +5626,7 @@ fib_test_pref (void) * 2 high, 2 medium and 2 low preference non-recursive paths */ fib_route_path_t nr_path_hi_1 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[0]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5637,7 +5637,7 @@ fib_test_pref (void) }, }; fib_route_path_t nr_path_hi_2 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[0]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5648,7 +5648,7 @@ fib_test_pref (void) }, }; fib_route_path_t nr_path_med_1 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[1]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5659,7 +5659,7 @@ fib_test_pref (void) }, }; fib_route_path_t nr_path_med_2 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[1]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5670,7 +5670,7 @@ fib_test_pref (void) }, }; fib_route_path_t nr_path_low_1 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[2]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5681,7 +5681,7 @@ fib_test_pref (void) }, }; fib_route_path_t nr_path_low_2 = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = tm->hw[2]->sw_if_index, .frp_fib_index = ~0, .frp_weight = 1, @@ -5897,7 +5897,7 @@ fib_test_pref (void) }, }; fib_route_path_t r_path_hi = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = 1, @@ -5906,7 +5906,7 @@ fib_test_pref (void) .frp_addr = pfx_1_1_1_1_s_32.fp_addr, }; fib_route_path_t r_path_med = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = 1, @@ -5915,7 +5915,7 @@ fib_test_pref (void) .frp_addr = pfx_1_1_1_2_s_32.fp_addr, }; fib_route_path_t r_path_low = { - .frp_proto = FIB_PROTOCOL_IP4, + .frp_proto = DPO_PROTO_IP4, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = 1, @@ -6099,7 +6099,7 @@ fib_test_label (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, @@ -6115,7 +6115,7 @@ fib_test_label (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -6145,7 +6145,7 @@ fib_test_label (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[1]->sw_if_index, ~0, @@ -6161,7 +6161,7 @@ fib_test_label (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6243,7 +6243,7 @@ fib_test_label (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -6282,7 +6282,7 @@ fib_test_label (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_1, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6360,7 +6360,7 @@ fib_test_label (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_2, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6440,7 +6440,7 @@ fib_test_label (void) &pfx_2_2_2_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, fib_index, @@ -6612,7 +6612,7 @@ fib_test_label (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -6669,7 +6669,7 @@ fib_test_label (void) fib_table_entry_path_remove(fib_index, &pfx_1_1_1_1_s_32, FIB_SOURCE_API, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_1, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6711,7 +6711,7 @@ fib_test_label (void) &pfx_1_1_1_1_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -6842,7 +6842,7 @@ fib_test_label (void) &pfx_1_1_1_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -6884,7 +6884,7 @@ fib_test_label (void) &pfx_2_2_2_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_2_s_32.fp_addr, ~0, fib_index, @@ -6912,7 +6912,7 @@ fib_test_label (void) &pfx_1_1_1_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_1, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6945,7 +6945,7 @@ fib_test_label (void) &pfx_1_1_1_2_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_1, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -6987,7 +6987,7 @@ fib_test_label (void) &pfx_2_2_2_3_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, fib_index, @@ -7031,7 +7031,7 @@ fib_test_label (void) &pfx_2_2_2_4_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx_1_1_1_1_s_32.fp_addr, ~0, fib_index, @@ -7081,7 +7081,7 @@ fib_test_label (void) &pfx_2_2_5_5_s_32, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_11_1, tm->hw[1]->sw_if_index, ~0, // invalid fib index @@ -7689,7 +7689,7 @@ fib_test_bfd (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -7706,7 +7706,7 @@ fib_test_bfd (void) FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -7780,7 +7780,7 @@ fib_test_bfd (void) &pfx_10_10_10_1_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -7819,7 +7819,7 @@ fib_test_bfd (void) &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -7851,7 +7851,7 @@ fib_test_bfd (void) &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -7907,7 +7907,7 @@ fib_test_bfd (void) &pfx_200_0_0_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, ~0, // recursive 0, // default fib index @@ -7926,7 +7926,7 @@ fib_test_bfd (void) &pfx_200_0_0_0_s_24, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, ~0, // recursive 0, // default fib index @@ -8065,7 +8065,7 @@ fib_test_bfd (void) &pfx_5_5_5_5_s_32, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8096,7 +8096,7 @@ fib_test_bfd (void) &pfx_5_5_5_5_s_32, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_2, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8234,7 +8234,7 @@ lfib_test (void) &pfx, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &zero_addr, ~0, fib_index, @@ -8285,7 +8285,7 @@ lfib_test (void) &pfx, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &zero_addr, ~0, lfib_index, @@ -8363,7 +8363,7 @@ lfib_test (void) &pfx_1200, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8389,7 +8389,7 @@ lfib_test (void) }, }; fib_route_path_t *rpaths = NULL, rpath = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_local_label = 1200, .frp_eos = MPLS_NON_EOS, .frp_sw_if_index = ~0, // recurive @@ -8545,7 +8545,7 @@ lfib_test (void) &pfx_2500, FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8590,7 +8590,7 @@ lfib_test (void) &pfx_3500, FIB_SOURCE_API, FIB_ENTRY_FLAG_MULTICAST, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8610,7 +8610,7 @@ lfib_test (void) &pfx_3500, FIB_SOURCE_API, FIB_ENTRY_FLAG_MULTICAST, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -8637,7 +8637,7 @@ lfib_test (void) &pfx_3500, FIB_SOURCE_API, FIB_ENTRY_FLAG_MULTICAST, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, NULL, 5, // rpf-id 0, // default table diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h index a209ff3c..f11a55da 100644 --- a/src/vnet/fib/fib_types.h +++ b/src/vnet/fib/fib_types.h @@ -32,9 +32,9 @@ typedef u32 fib_node_index_t; * Protocol Type. packed so it consumes a u8 only */ typedef enum fib_protocol_t_ { - FIB_PROTOCOL_IP4 = 0, - FIB_PROTOCOL_IP6, - FIB_PROTOCOL_MPLS, + FIB_PROTOCOL_IP4 = DPO_PROTO_IP4, + FIB_PROTOCOL_IP6 = DPO_PROTO_IP6, + FIB_PROTOCOL_MPLS = DPO_PROTO_MPLS, } __attribute__ ((packed)) fib_protocol_t; #define FIB_PROTOCOLS { \ @@ -338,7 +338,7 @@ typedef struct fib_route_path_t_ { * The protocol of the address below. We need this since the all * zeros address is ambiguous. */ - fib_protocol_t frp_proto; + dpo_proto_t frp_proto; union { /** diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c index df7e9388..5694bb2f 100644 --- a/src/vnet/interface_format.c +++ b/src/vnet/interface_format.c @@ -165,9 +165,15 @@ format_vnet_sw_if_index_name (u8 * s, va_list * args) { vnet_main_t *vnm = va_arg (*args, vnet_main_t *); u32 sw_if_index = va_arg (*args, u32); - return format (s, "%U", - format_vnet_sw_interface_name, vnm, - vnet_get_sw_interface (vnm, sw_if_index)); + vnet_sw_interface_t *si; + + si = vnet_get_sw_interface_safe (vnm, sw_if_index); + + if (NULL == si) + { + return format (s, "DELETED"); + } + return format (s, "%U", format_vnet_sw_interface_name, vnm, si); } u8 * diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index ee17ea88..7a8d7a0c 100755 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -724,7 +724,7 @@ ip4_add_interface_routes (u32 sw_if_index, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, /* No next-hop address */ NULL, sw_if_index, @@ -767,7 +767,7 @@ ip4_add_interface_routes (u32 sw_if_index, fib_table_entry_update_one_path (fib_index, &net_pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &net_pfx.fp_addr, sw_if_index, // invalid FIB index @@ -803,7 +803,7 @@ ip4_add_interface_routes (u32 sw_if_index, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx.fp_addr, sw_if_index, // invalid FIB index diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index bc66416e..8ae08a01 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -355,7 +355,7 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, /* No next-hop address */ NULL, sw_if_index, /* invalid FIB index */ @@ -390,7 +390,7 @@ ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &pfx.fp_addr, sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index e8eebd4e..6a9139ab 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -284,7 +284,7 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, (ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index), &pfx, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &pfx.fp_addr, n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); pool_put (nm->neighbor_pool, n); @@ -645,7 +645,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, n->fib_entry_index = fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP6, &pfx.fp_addr, + DPO_PROTO_IP6, &pfx.fp_addr, n->key.sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); } @@ -776,7 +776,7 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, (ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index), &pfx, FIB_SOURCE_ADJ, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &pfx.fp_addr, n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); } pool_put (nm->neighbor_pool, n); @@ -4110,7 +4110,7 @@ ip6_neighbor_proxy_add_del (u32 sw_if_index, ip6_address_t * addr, u8 is_del) fib_table_entry_path_remove (fib_index, &pfx, FIB_SOURCE_IP6_ND_PROXY, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh, sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); @@ -4124,7 +4124,7 @@ ip6_neighbor_proxy_add_del (u32 sw_if_index, ip6_address_t * addr, u8 is_del) &pfx, FIB_SOURCE_IP6_ND_PROXY, FIB_ENTRY_FLAG_NONE, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, &nh, sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index 4cbf75a3..0676a387 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -156,9 +156,9 @@ copy_fib_next_hop (fib_route_path_encode_t * api_rpath, void *fp_arg) int is_ip4; vl_api_fib_path_t *fp = (vl_api_fib_path_t *) fp_arg; - if (api_rpath->rpath.frp_proto == FIB_PROTOCOL_IP4) + if (api_rpath->rpath.frp_proto == DPO_PROTO_IP4) fp->afi = IP46_TYPE_IP4; - else if (api_rpath->rpath.frp_proto == FIB_PROTOCOL_IP6) + else if (api_rpath->rpath.frp_proto == DPO_PROTO_IP6) fp->afi = IP46_TYPE_IP6; else { @@ -714,7 +714,7 @@ add_del_route_t_handler (u8 is_multipath, u8 is_rpf_id, u32 fib_index, const fib_prefix_t * prefix, - u8 next_hop_proto_is_ip4, + dpo_proto_t next_hop_proto, const ip46_address_t * next_hop, u32 next_hop_sw_if_index, u8 next_hop_fib_index, @@ -726,8 +726,7 @@ add_del_route_t_handler (u8 is_multipath, vnet_classify_main_t *cm = &vnet_classify_main; fib_route_path_flags_t path_flags = FIB_ROUTE_PATH_FLAG_NONE; fib_route_path_t path = { - .frp_proto = (next_hop_proto_is_ip4 ? - FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6), + .frp_proto = next_hop_proto, .frp_addr = (NULL == next_hop ? zero_addr : *next_hop), .frp_sw_if_index = next_hop_sw_if_index, .frp_fib_index = next_hop_fib_index, @@ -740,7 +739,7 @@ add_del_route_t_handler (u8 is_multipath, if (MPLS_LABEL_INVALID != next_hop_via_label) { - path.frp_proto = FIB_PROTOCOL_MPLS; + path.frp_proto = DPO_PROTO_MPLS; path.frp_local_label = next_hop_via_label; path.frp_eos = MPLS_NON_EOS; } @@ -855,7 +854,7 @@ int add_del_route_check (fib_protocol_t table_proto, u32 table_id, u32 next_hop_sw_if_index, - fib_protocol_t next_hop_table_proto, + dpo_proto_t next_hop_table_proto, u32 next_hop_table_id, u8 create_missing_tables, u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index) @@ -887,11 +886,18 @@ add_del_route_check (fib_protocol_t table_proto, } else { + fib_protocol_t fib_nh_proto; + + if (next_hop_table_proto > DPO_PROTO_MPLS) + return (0); + + fib_nh_proto = dpo_proto_to_fib (next_hop_table_proto); + if (is_rpf_id) - *next_hop_fib_index = mfib_table_find (next_hop_table_proto, + *next_hop_fib_index = mfib_table_find (fib_nh_proto, ntohl (next_hop_table_id)); else - *next_hop_fib_index = fib_table_find (next_hop_table_proto, + *next_hop_fib_index = fib_table_find (fib_nh_proto, ntohl (next_hop_table_id)); if (~0 == *next_hop_fib_index) @@ -900,12 +906,12 @@ add_del_route_check (fib_protocol_t table_proto, { if (is_rpf_id) *next_hop_fib_index = - mfib_table_find_or_create_and_lock (next_hop_table_proto, + mfib_table_find_or_create_and_lock (fib_nh_proto, ntohl (next_hop_table_id)); else *next_hop_fib_index = - fib_table_find_or_create_and_lock (next_hop_table_proto, + fib_table_find_or_create_and_lock (fib_nh_proto, ntohl (next_hop_table_id)); } @@ -930,7 +936,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) rv = add_del_route_check (FIB_PROTOCOL_IP4, mp->table_id, mp->next_hop_sw_if_index, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, mp->next_hop_table_id, mp->create_vrf_if_needed, 0, &fib_index, &next_hop_fib_index); @@ -970,7 +976,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->classify_table_index, mp->is_resolve_host, mp->is_resolve_attached, 0, 0, - fib_index, &pfx, 1, + fib_index, &pfx, DPO_PROTO_IP4, &nh, ntohl (mp->next_hop_sw_if_index), next_hop_fib_index, @@ -990,7 +996,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) rv = add_del_route_check (FIB_PROTOCOL_IP6, mp->table_id, mp->next_hop_sw_if_index, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, mp->next_hop_table_id, mp->create_vrf_if_needed, 0, &fib_index, &next_hop_fib_index); @@ -1030,7 +1036,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->classify_table_index, mp->is_resolve_host, mp->is_resolve_attached, 0, 0, - fib_index, &pfx, 0, + fib_index, &pfx, DPO_PROTO_IP6, &nh, ntohl (mp->next_hop_sw_if_index), next_hop_fib_index, mp->next_hop_weight, @@ -1106,7 +1112,7 @@ mroute_add_del_handler (u8 is_add, fib_route_path_t path = { .frp_sw_if_index = next_hop_sw_if_index, - .frp_proto = prefix->fp_proto, + .frp_proto = fib_proto_to_dpo (prefix->fp_proto), }; if (is_local) diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 533d010a..41e46070 100755 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -423,7 +423,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, { rpath.frp_weight = 1; rpath.frp_eos = MPLS_NON_EOS; - rpath.frp_proto = FIB_PROTOCOL_MPLS; + rpath.frp_proto = DPO_PROTO_MPLS; rpath.frp_sw_if_index = ~0; vec_add1 (rpaths, rpath); } @@ -449,7 +449,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1 (rpaths, rpath); } @@ -460,7 +460,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1 (rpaths, rpath); } else if (unformat (line_input, "weight %u", &weight)) @@ -479,7 +479,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1 (rpaths, rpath); } else if (unformat (line_input, "via %U next-hop-table %d", @@ -488,7 +488,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1 (rpaths, rpath); } else if (unformat (line_input, "via %U", @@ -501,7 +501,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, rpath.frp_fib_index = table_id; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1 (rpaths, rpath); } else if (unformat (line_input, "via %U", @@ -510,13 +510,13 @@ vnet_ip_route_cmd (vlib_main_t * vm, rpath.frp_fib_index = table_id; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1 (rpaths, rpath); } else if (unformat (line_input, "lookup in table %d", &rpath.frp_fib_index)) { - rpath.frp_proto = pfx.fp_proto; + rpath.frp_proto = fib_proto_to_dpo (pfx.fp_proto); rpath.frp_sw_if_index = ~0; vec_add1 (rpaths, rpath); } @@ -526,7 +526,7 @@ vnet_ip_route_cmd (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = prefixs[0].fp_proto; + rpath.frp_proto = fib_proto_to_dpo (prefixs[0].fp_proto); vec_add1 (rpaths, rpath); } else if (vec_len (prefixs) > 0 && diff --git a/src/vnet/lisp-gpe/lisp_gpe.c b/src/vnet/lisp-gpe/lisp_gpe.c index 0acc7349..018895ad 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.c +++ b/src/vnet/lisp-gpe/lisp_gpe.c @@ -454,7 +454,7 @@ vnet_gpe_add_del_native_fwd_rpath (vnet_gpe_native_fwd_rpath_args_t * a) fib_route_path_t *rpath; u8 ip_version; - ip_version = a->rpath.frp_proto == FIB_PROTOCOL_IP4 ? IP4 : IP6; + ip_version = a->rpath.frp_proto == DPO_PROTO_IP4 ? IP4 : IP6; if (a->is_add) { @@ -511,7 +511,7 @@ gpe_native_forward_command_fn (vlib_main_t * vm, unformat_input_t * input, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; } else if (unformat (line_input, "via %U %U", unformat_ip6_address, @@ -520,21 +520,21 @@ gpe_native_forward_command_fn (vlib_main_t * vm, unformat_input_t * input, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; } else if (unformat (line_input, "via %U", unformat_ip4_address, &rpath.frp_addr.ip4)) { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; } else if (unformat (line_input, "via %U", unformat_ip6_address, &rpath.frp_addr.ip6)) { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; } else { @@ -549,7 +549,8 @@ gpe_native_forward_command_fn (vlib_main_t * vm, unformat_input_t * input, } else { - rpath.frp_fib_index = fib_table_find (rpath.frp_proto, table_id); + rpath.frp_fib_index = + fib_table_find (dpo_proto_to_fib (rpath.frp_proto), table_id); if ((u32) ~ 0 == rpath.frp_fib_index) { error = clib_error_return (0, "Nonexistent table id %d", table_id); diff --git a/src/vnet/lisp-gpe/lisp_gpe_api.c b/src/vnet/lisp-gpe/lisp_gpe_api.c index f1663699..4367a719 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_api.c +++ b/src/vnet/lisp-gpe/lisp_gpe_api.c @@ -455,10 +455,10 @@ static void clib_memcpy (&a->rpath.frp_addr.ip6, mp->nh_addr, sizeof (ip6_address_t)); a->is_add = mp->is_add; - a->rpath.frp_proto = mp->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; - a->rpath.frp_fib_index = fib_table_find (a->rpath.frp_proto, - clib_net_to_host_u32 - (mp->table_id)); + a->rpath.frp_proto = mp->is_ip4 ? DPO_PROTO_IP4 : DPO_PROTO_IP6; + a->rpath.frp_fib_index = + fib_table_find (dpo_proto_to_fib (a->rpath.frp_proto), + clib_net_to_host_u32 (mp->table_id)); if (~0 == a->rpath.frp_fib_index) { rv = VNET_API_ERROR_INVALID_VALUE; @@ -484,7 +484,7 @@ gpe_native_fwd_rpaths_copy (vl_api_gpe_native_fwd_rpath_t * dst, vec_foreach (e, src) { memset (&dst[i], 0, sizeof (*dst)); - table = fib_table_get (e->frp_fib_index, e->frp_proto); + table = fib_table_get (e->frp_fib_index, dpo_proto_to_fib (e->frp_proto)); dst[i].fib_index = table->ft_table_id; dst[i].nh_sw_if_index = e->frp_sw_if_index; dst[i].is_ip4 = is_ip4; diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c index 395b493a..ac048149 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c +++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c @@ -225,6 +225,7 @@ lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths) { const lisp_gpe_adjacency_t *ladj; fib_route_path_t *rpaths = NULL; + fib_protocol_t fp; u8 best_priority; u32 ii; @@ -239,9 +240,9 @@ lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths) ladj = lisp_gpe_adjacency_get (paths[ii].lisp_adj); - ip_address_to_46 (&ladj->remote_rloc, - &rpaths[ii].frp_addr, &rpaths[ii].frp_proto); + ip_address_to_46 (&ladj->remote_rloc, &rpaths[ii].frp_addr, &fp); + rpaths[ii].frp_proto = fib_proto_to_dpo (fp); rpaths[ii].frp_sw_if_index = ladj->sw_if_index; rpaths[ii].frp_weight = (paths[ii].weight ? paths[ii].weight : 1); } diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c index 5c6f8126..5e48e919 100644 --- a/src/vnet/mfib/ip6_mfib.c +++ b/src/vnet/mfib/ip6_mfib.c @@ -158,7 +158,7 @@ ip6_create_mfib_with_table_id (u32 table_id) .fp_proto = FIB_PROTOCOL_IP6, }; const fib_route_path_t path_for_us = { - .frp_proto = FIB_PROTOCOL_IP6, + .frp_proto = DPO_PROTO_IP6, .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, @@ -222,7 +222,7 @@ ip6_mfib_table_destroy (ip6_mfib_t *mfib) .fp_proto = FIB_PROTOCOL_IP6, }; const fib_route_path_t path_for_us = { - .frp_proto = FIB_PROTOCOL_IP6, + .frp_proto = DPO_PROTO_IP6, .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, @@ -259,7 +259,7 @@ void ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable) { const fib_route_path_t path = { - .frp_proto = FIB_PROTOCOL_IP6, + .frp_proto = DPO_PROTO_IP6, .frp_addr = zero_addr, .frp_sw_if_index = sw_if_index, .frp_fib_index = ~0, diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c index cf25b67a..b37f8825 100644 --- a/src/vnet/mfib/mfib_entry.c +++ b/src/vnet/mfib/mfib_entry.c @@ -764,18 +764,16 @@ mfib_entry_update (fib_node_index_t mfib_entry_index, * entry */ fib_node_index_t old_pl_index; - fib_protocol_t fp; + dpo_proto_t dp; dpo_id_t dpo = DPO_INVALID; - fp = mfib_entry_get_proto(mfib_entry); + dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry)); old_pl_index = msrc->mfes_pl; - dpo_set(&dpo, DPO_REPLICATE, - fib_proto_to_dpo(fp), - repi); + dpo_set(&dpo, DPO_REPLICATE, dp, repi); msrc->mfes_pl = - fib_path_list_create_special(fp, + fib_path_list_create_special(dp, FIB_PATH_LIST_FLAG_EXCLUSIVE, &dpo); diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c index 7c92ae99..57787eca 100644 --- a/src/vnet/mfib/mfib_test.c +++ b/src/vnet/mfib/mfib_test.c @@ -387,7 +387,7 @@ mfib_test_i (fib_protocol_t PROTO, fib_route_path_t path_via_if0 = { - .frp_proto = PROTO, + .frp_proto = fib_proto_to_dpo(PROTO), .frp_addr = zero_addr, .frp_sw_if_index = tm->hw[0]->sw_if_index, .frp_fib_index = ~0, @@ -411,7 +411,7 @@ mfib_test_i (fib_protocol_t PROTO, MFIB_ITF_FLAG_ACCEPT)); fib_route_path_t path_via_if1 = { - .frp_proto = PROTO, + .frp_proto = fib_proto_to_dpo(PROTO), .frp_addr = zero_addr, .frp_sw_if_index = tm->hw[1]->sw_if_index, .frp_fib_index = ~0, @@ -419,7 +419,7 @@ mfib_test_i (fib_protocol_t PROTO, .frp_flags = 0, }; fib_route_path_t path_via_if2 = { - .frp_proto = PROTO, + .frp_proto = fib_proto_to_dpo(PROTO), .frp_addr = zero_addr, .frp_sw_if_index = tm->hw[2]->sw_if_index, .frp_fib_index = ~0, @@ -427,7 +427,7 @@ mfib_test_i (fib_protocol_t PROTO, .frp_flags = 0, }; fib_route_path_t path_via_if3 = { - .frp_proto = PROTO, + .frp_proto = fib_proto_to_dpo(PROTO), .frp_addr = zero_addr, .frp_sw_if_index = tm->hw[3]->sw_if_index, .frp_fib_index = ~0, @@ -435,7 +435,7 @@ mfib_test_i (fib_protocol_t PROTO, .frp_flags = 0, }; fib_route_path_t path_for_us = { - .frp_proto = PROTO, + .frp_proto = fib_proto_to_dpo(PROTO), .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, @@ -1121,7 +1121,7 @@ mfib_test_i (fib_protocol_t PROTO, &pfx_3500, FIB_SOURCE_API, FIB_ENTRY_FLAG_MULTICAST, - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &nh_10_10_10_1, tm->hw[0]->sw_if_index, ~0, // invalid fib index @@ -1138,7 +1138,7 @@ mfib_test_i (fib_protocol_t PROTO, * An (S,G) that resolves via the mLDP head-end */ fib_route_path_t path_via_mldp = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_local_label = pfx_3500.fp_label, .frp_eos = MPLS_EOS, .frp_sw_if_index = 0xffffffff, diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api index 67f1045d..5973a0a6 100644 --- a/src/vnet/mpls/mpls.api +++ b/src/vnet/mpls/mpls.api @@ -156,7 +156,7 @@ manual_endian manual_print define mpls_tunnel_details @param mr_is_interface_rx - Interface Receive path @param mr_is_interface_rx - RPF-ID Receive path. The next-hop interface is used as the RPF-ID - @param mr_next_hop_proto_is_ip4 - The next-hop is IPV4 + @param mr_next_hop_proto - The next-hop protocol, of type dpo_proto_t @param mr_next_hop_weight - The weight, for UCMP @param mr_next_hop[16] - the nextop address @param mr_next_hop_sw_if_index - the next-hop SW interface @@ -182,7 +182,7 @@ autoreply define mpls_route_add_del u8 mr_is_resolve_attached; u8 mr_is_interface_rx; u8 mr_is_rpf_id; - u8 mr_next_hop_proto_is_ip4; + u8 mr_next_hop_proto; u8 mr_next_hop_weight; u8 mr_next_hop_preference; u8 mr_next_hop[16]; diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 068d31f4..266ba42c 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -261,7 +261,7 @@ vnet_mpls_local_label (vlib_main_t * vm, &rpath.frp_sw_if_index, &rpath.frp_weight)) { - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1(rpaths, rpath); } @@ -272,7 +272,7 @@ vnet_mpls_local_label (vlib_main_t * vm, &rpath.frp_sw_if_index, &rpath.frp_weight)) { - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1(rpaths, rpath); } @@ -283,7 +283,7 @@ vnet_mpls_local_label (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1(rpaths, rpath); } else if (unformat (line_input, "rx-ip4 %U", @@ -291,7 +291,7 @@ vnet_mpls_local_label (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX; vec_add1(rpaths, rpath); } @@ -302,7 +302,7 @@ vnet_mpls_local_label (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1(rpaths, rpath); } else if (unformat (line_input, "via %U next-hop-table %d", @@ -312,7 +312,7 @@ vnet_mpls_local_label (vlib_main_t * vm, { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1(rpaths, rpath); } else if (unformat (line_input, "via %U next-hop-table %d", @@ -322,7 +322,7 @@ vnet_mpls_local_label (vlib_main_t * vm, { rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1(rpaths, rpath); } else if (unformat (line_input, "via %U", @@ -336,7 +336,7 @@ vnet_mpls_local_label (vlib_main_t * vm, rpath.frp_fib_index = table_id; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; vec_add1(rpaths, rpath); } else if (unformat (line_input, "via %U", @@ -346,7 +346,7 @@ vnet_mpls_local_label (vlib_main_t * vm, rpath.frp_fib_index = table_id; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; vec_add1(rpaths, rpath); } else if (unformat (line_input, "%d", &local_label)) @@ -355,7 +355,7 @@ vnet_mpls_local_label (vlib_main_t * vm, "ip4-lookup-in-table %d", &rpath.frp_fib_index)) { - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; pfx.fp_payload_proto = DPO_PROTO_IP4; vec_add1(rpaths, rpath); @@ -364,7 +364,7 @@ vnet_mpls_local_label (vlib_main_t * vm, "ip6-lookup-in-table %d", &rpath.frp_fib_index)) { - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; vec_add1(rpaths, rpath); pfx.fp_payload_proto = DPO_PROTO_IP6; @@ -373,11 +373,21 @@ vnet_mpls_local_label (vlib_main_t * vm, "mpls-lookup-in-table %d", &rpath.frp_fib_index)) { - rpath.frp_proto = FIB_PROTOCOL_MPLS; + rpath.frp_proto = DPO_PROTO_MPLS; rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; pfx.fp_payload_proto = DPO_PROTO_MPLS; vec_add1(rpaths, rpath); } + else if (unformat (line_input, + "l2-input-on %U", + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_proto = DPO_PROTO_ETHERNET; + pfx.fp_payload_proto = DPO_PROTO_ETHERNET; + rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "out-label %U", unformat_mpls_unicast_label, &out_label)) @@ -440,7 +450,7 @@ vnet_mpls_local_label (vlib_main_t * vm, pfx.fp_proto = FIB_PROTOCOL_MPLS; pfx.fp_len = 21; pfx.fp_label = local_label; - pfx.fp_payload_proto = fib_proto_to_dpo(rpaths[0].frp_proto); + pfx.fp_payload_proto = rpaths[0].frp_proto; /* * the CLI parsing stored table Ids, swap to FIB indicies diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index 92fb24a6..737299e6 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -144,14 +144,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, }; if (pfx.fp_eos) { - if (mp->mr_next_hop_proto_is_ip4) - { - pfx.fp_payload_proto = DPO_PROTO_IP4; - } - else - { - pfx.fp_payload_proto = DPO_PROTO_IP6; - } + pfx.fp_payload_proto = mp->mr_next_hop_proto; } else { @@ -161,7 +154,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, rv = add_del_route_check (FIB_PROTOCOL_MPLS, mp->mr_table_id, mp->mr_next_hop_sw_if_index, - dpo_proto_to_fib (pfx.fp_payload_proto), + pfx.fp_payload_proto, mp->mr_next_hop_table_id, mp->mr_create_table_if_needed, mp->mr_is_rpf_id, @@ -173,9 +166,9 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, ip46_address_t nh; memset (&nh, 0, sizeof (nh)); - if (mp->mr_next_hop_proto_is_ip4) + if (DPO_PROTO_IP4 == mp->mr_next_hop_proto) memcpy (&nh.ip4, mp->mr_next_hop, sizeof (nh.ip4)); - else + else if (DPO_PROTO_IP6 == mp->mr_next_hop_proto) memcpy (&nh.ip6, mp->mr_next_hop, sizeof (nh.ip6)); n_labels = mp->mr_next_hop_n_out_labels; @@ -202,7 +195,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, mp->mr_is_interface_rx, mp->mr_is_rpf_id, fib_index, &pfx, - mp->mr_next_hop_proto_is_ip4, + mp->mr_next_hop_proto, &nh, ntohl (mp->mr_next_hop_sw_if_index), next_hop_fib_index, mp->mr_next_hop_weight, @@ -243,13 +236,13 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) if (mp->mt_next_hop_proto_is_ip4) { - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; clib_memcpy (&rpath.frp_addr.ip4, mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); } else { - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; clib_memcpy (&rpath.frp_addr.ip6, mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); } diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c index c025cc58..6452a60b 100644 --- a/src/vnet/mpls/mpls_tunnel.c +++ b/src/vnet/mpls/mpls_tunnel.c @@ -171,7 +171,7 @@ mpls_tunnel_mk_lb (mpls_tunnel_t *mt, vec_validate(ctx.next_hops, fib_path_list_get_n_paths(mt->mt_path_list)); vec_reset_length(ctx.next_hops); - lb_proto = vnet_link_to_dpo_proto(linkt); + lb_proto = fib_forw_chain_type_to_dpo_proto(fct); fib_path_list_walk(mt->mt_path_list, mpls_tunnel_collect_forwarding, @@ -313,12 +313,34 @@ mpls_tunnel_restack (mpls_tunnel_t *mt) /* * walk all the adjacencies on the MPLS interface and restack them */ - FOR_EACH_FIB_PROTOCOL(proto) + if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2) { - adj_nbr_walk(mt->mt_sw_if_index, - proto, - mpls_adj_walk_cb, - NULL); + /* + * Stack a load-balance that drops, whilst we have no paths + */ + vnet_hw_interface_t * hi; + dpo_id_t dpo = DPO_INVALID; + + mpls_tunnel_mk_lb(mt, + VNET_LINK_MPLS, + FIB_FORW_CHAIN_TYPE_ETHERNET, + &dpo); + + hi = vnet_get_hw_interface(vnet_get_main(), mt->mt_hw_if_index); + dpo_stack_from_node(hi->tx_node_index, + &mt->mt_l2_lb, + &dpo); + dpo_reset(&dpo); + } + else + { + FOR_EACH_FIB_PROTOCOL(proto) + { + adj_nbr_walk(mt->mt_sw_if_index, + proto, + mpls_adj_walk_cb, + NULL); + } } } @@ -495,7 +517,7 @@ mpls_tunnel_tx (vlib_main_t * vm, b0 = vlib_get_buffer(vm, bi0); - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_lb.dpoi_index; if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -506,7 +528,7 @@ mpls_tunnel_tx (vlib_main_t * vm, vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, - bi0, mt->mt_l2_tx_arc); + bi0, mt->mt_l2_lb.dpoi_next_node); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -565,8 +587,7 @@ vnet_mpls_tunnel_del (u32 sw_if_index) if (FIB_NODE_INDEX_INVALID != mt->mt_path_list) fib_path_list_child_remove(mt->mt_path_list, mt->mt_sibling_index); - if (ADJ_INDEX_INVALID != mt->mt_l2_adj) - adj_unlock(mt->mt_l2_adj); + dpo_reset(&mt->mt_l2_lb); vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index); pool_put(mpls_tunnel_pool, mt); @@ -587,12 +608,13 @@ vnet_mpls_tunnel_create (u8 l2_only, memset (mt, 0, sizeof (*mt)); mti = mt - mpls_tunnel_pool; fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL); - mt->mt_l2_adj = ADJ_INDEX_INVALID; mt->mt_path_list = FIB_NODE_INDEX_INVALID; mt->mt_sibling_index = FIB_NODE_INDEX_INVALID; if (is_multicast) mt->mt_flags |= MPLS_TUNNEL_FLAG_MCAST; + if (l2_only) + mt->mt_flags |= MPLS_TUNNEL_FLAG_L2; /* * Create a new, or re=use and old, tunnel HW interface @@ -614,7 +636,7 @@ vnet_mpls_tunnel_create (u8 l2_only, mti, mpls_tunnel_hw_interface_class.index, mti); - hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index); + hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); } /* @@ -624,19 +646,6 @@ vnet_mpls_tunnel_create (u8 l2_only, vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0); mpls_tunnel_db[mt->mt_sw_if_index] = mti; - if (l2_only) - { - mt->mt_l2_adj = - adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list), - VNET_LINK_ETHERNET, - &zero_addr, - mt->mt_sw_if_index); - - mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), - hi->tx_node_index, - "adj-l2-midchain"); - } - return (mt->mt_sw_if_index); } @@ -803,7 +812,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; } else if (unformat (line_input, "via %U %U", @@ -813,7 +822,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, &rpath.frp_sw_if_index)) { rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; } else if (unformat (line_input, "via %U", unformat_ip6_address, @@ -822,7 +831,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, rpath.frp_fib_index = 0; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_proto = DPO_PROTO_IP6; } else if (unformat (line_input, "via %U", unformat_ip4_address, @@ -831,7 +840,7 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, rpath.frp_fib_index = 0; rpath.frp_weight = 1; rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_proto = DPO_PROTO_IP4; } else if (unformat (line_input, "l2-only")) l2_only = 1; @@ -915,6 +924,14 @@ format_mpls_tunnel (u8 * s, va_list * args) s = format(s, "%U", format_fib_path_ext_list, &mt->mt_path_exts); s = format(s, "\n"); + if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2) + { + s = format(s, " forwarding: %U\n", + format_fib_forw_chain_type, + FIB_FORW_CHAIN_TYPE_ETHERNET); + s = format(s, " %U\n", format_dpo_id, &mt->mt_l2_lb, 2); + } + return (s); } diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h index 4cb0a860..285817c3 100644 --- a/src/vnet/mpls/mpls_tunnel.h +++ b/src/vnet/mpls/mpls_tunnel.h @@ -22,15 +22,20 @@ typedef enum mpls_tunnel_attribute_t_ { MPLS_TUNNEL_ATTRIBUTE_FIRST = 0, + /** + * @brief The tunnel is L2 only + */ + MPLS_TUNNEL_ATTRIBUTE_L2 = MPLS_TUNNEL_ATTRIBUTE_FIRST, /** * @brief The tunnel has an underlying multicast LSP */ - MPLS_TUNNEL_ATTRIBUTE_MCAST = MPLS_TUNNEL_ATTRIBUTE_FIRST, + MPLS_TUNNEL_ATTRIBUTE_MCAST, MPLS_TUNNEL_ATTRIBUTE_LAST = MPLS_TUNNEL_ATTRIBUTE_MCAST, } mpls_tunnel_attribute_t; #define MPLS_TUNNEL_ATTRIBUTES { \ [MPLS_TUNNEL_ATTRIBUTE_MCAST] = "multicast", \ + [MPLS_TUNNEL_ATTRIBUTE_L2] = "L2", \ } #define FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(_item) \ for (_item = MPLS_TUNNEL_ATTRIBUTE_FIRST; \ @@ -39,6 +44,7 @@ typedef enum mpls_tunnel_attribute_t_ typedef enum mpls_tunnel_flag_t_ { MPLS_TUNNEL_FLAG_NONE = 0, + MPLS_TUNNEL_FLAG_L2 = (1 << MPLS_TUNNEL_ATTRIBUTE_L2), MPLS_TUNNEL_FLAG_MCAST = (1 << MPLS_TUNNEL_ATTRIBUTE_MCAST), } __attribute__ ((packed)) mpls_tunnel_flags_t; @@ -60,14 +66,19 @@ typedef struct mpls_tunnel_t_ /** * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET - * adjacency + * load-balance + */ + dpo_id_t mt_l2_lb; + + /** + * @brief The HW interface index of the tunnel interfaces */ - adj_index_t mt_l2_adj; + u32 mt_hw_if_index; /** - * @brief on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain + * @brief The SW interface index of the tunnel interfaces */ - u32 mt_l2_tx_arc; + u32 mt_sw_if_index; /** * @brief The path-list over which the tunnel's destination is reachable @@ -83,23 +94,6 @@ typedef struct mpls_tunnel_t_ * A vector of path extensions o hold the label stack for each path */ fib_path_ext_list_t mt_path_exts; - - /** - * @brief Flag to indicate the tunnel is only for L2 traffic, that is - * this tunnel belongs in a bridge domain. - */ - u8 mt_l2_only; - - /** - * @brief The HW interface index of the tunnel interfaces - */ - u32 mt_hw_if_index; - - /** - * @brief The SW interface index of the tunnel interfaces - */ - u32 mt_sw_if_index; - } mpls_tunnel_t; /** diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/vnet/srmpls/sr_mpls_policy.c index 5ebbc60d..db4ad2a7 100755 --- a/src/vnet/srmpls/sr_mpls_policy.c +++ b/src/vnet/srmpls/sr_mpls_policy.c @@ -75,7 +75,7 @@ create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight) segment_list->segments = vec_dup (sl); fib_route_path_t path = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = segment_list->weight, @@ -203,7 +203,7 @@ sr_mpls_policy_del (mpls_label_t bsid, u32 index) segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); fib_route_path_t path = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = segment_list->weight, @@ -308,7 +308,7 @@ sr_mpls_policy_mod (mpls_label_t bsid, u32 index, u8 operation, mpls_eos_bit_t eos; fib_route_path_t path = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_sw_if_index = ~0, .frp_fib_index = 0, .frp_weight = segment_list->weight, diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/vnet/srmpls/sr_mpls_steering.c index 37707049..3a9aea2d 100755 --- a/src/vnet/srmpls/sr_mpls_steering.c +++ b/src/vnet/srmpls/sr_mpls_steering.c @@ -218,7 +218,7 @@ sr_mpls_steering_policy (int is_del, mpls_label_t bsid, u32 sr_policy_index, update_fib:; fib_route_path_t path = { - .frp_proto = FIB_PROTOCOL_MPLS, + .frp_proto = DPO_PROTO_MPLS, .frp_local_label = sr_policy->bsid, .frp_eos = MPLS_EOS, .frp_sw_if_index = ~0, diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c index a7903751..704adaa7 100755 --- a/src/vnet/srv6/sr_steering.c +++ b/src/vnet/srv6/sr_steering.c @@ -310,7 +310,7 @@ update_fib: table_id : 0)), &pfx, FIB_SOURCE_SR, FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, (ip46_address_t *) & sr_policy->bsid, ~0, sm->fib_table_ip6, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); @@ -327,7 +327,7 @@ update_fib: table_id : 0)), &pfx, FIB_SOURCE_SR, FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, - FIB_PROTOCOL_IP6, + DPO_PROTO_IP6, (ip46_address_t *) & sr_policy->bsid, ~0, sm->fib_table_ip4, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c index 97bb1b15..462c79a0 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.c +++ b/src/vnet/vxlan-gpe/vxlan_gpe.c @@ -638,7 +638,7 @@ int vnet_vxlan_gpe_add_del_tunnel fib_node_index_t mfei; adj_index_t ai; fib_route_path_t path = { - .frp_proto = fp, + .frp_proto = fib_proto_to_dpo(fp), .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c index 1b3df2a8..dc973372 100644 --- a/src/vnet/vxlan/vxlan.c +++ b/src/vnet/vxlan/vxlan.c @@ -505,7 +505,7 @@ int vnet_vxlan_add_del_tunnel fib_node_index_t mfei; adj_index_t ai; fib_route_path_t path = { - .frp_proto = fp, + .frp_proto = fib_proto_to_dpo(fp), .frp_addr = zero_addr, .frp_sw_if_index = 0xffffffff, .frp_fib_index = ~0, diff --git a/src/vpp/app/vpe_cli.c b/src/vpp/app/vpe_cli.c index 94bdc84c..fcc496ad 100644 --- a/src/vpp/app/vpe_cli.c +++ b/src/vpp/app/vpe_cli.c @@ -98,7 +98,7 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, vec_add2 (rpaths, rpath, 1); - rpath->frp_proto = FIB_PROTOCOL_IP4; + rpath->frp_proto = DPO_PROTO_IP4; rpath->frp_addr = next_hops[i]; rpath->frp_sw_if_index = sw_if_index; rpath->frp_fib_index = ~0; diff --git a/test/test_bfd.py b/test/test_bfd.py index be42cdad..4cb6d379 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -20,7 +20,7 @@ from vpp_pg_interface import CaptureTimeoutError, is_ipv6_misc from vpp_lo_interface import VppLoInterface from util import ppp from vpp_papi_provider import UnexpectedApiReturnValueError -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto USEC_IN_SEC = 1000000 @@ -1678,12 +1678,12 @@ class BFDFIBTestCase(VppTestCase): ip_2001_s_64 = VppIpRoute(self, "2001::", 64, [VppRoutePath(self.pg0.remote_ip6, self.pg0.sw_if_index, - is_ip6=1)], + proto=DPO_PROTO_IP6)], is_ip6=1) ip_2002_s_64 = VppIpRoute(self, "2002::", 64, [VppRoutePath(self.pg0.remote_ip6, 0xffffffff, - is_ip6=1)], + proto=DPO_PROTO_IP6)], is_ip6=1) ip_2001_s_64.add_vpp_config() ip_2002_s_64.add_vpp_config() diff --git a/test/test_gre.py b/test/test_gre.py index 18b67dbd..1afc44fb 100644 --- a/test/test_gre.py +++ b/test/test_gre.py @@ -6,7 +6,7 @@ from logging import * from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppDot1QSubint from vpp_gre_interface import VppGreInterface, VppGre6Interface -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto from vpp_papi_provider import L2_VTR_OP from scapy.packet import Raw @@ -516,11 +516,12 @@ class TestGRE(VppTestCase): gre_if.admin_up() gre_if.config_ip6() - route_via_tun = VppIpRoute(self, "4004::1", 128, - [VppRoutePath("0::0", - gre_if.sw_if_index, - is_ip6=1)], - is_ip6=1) + route_via_tun = VppIpRoute( + self, "4004::1", 128, + [VppRoutePath("0::0", + gre_if.sw_if_index, + proto=DpoProto.DPO_PROTO_IP6)], + is_ip6=1) route_via_tun.add_vpp_config() @@ -542,11 +543,12 @@ class TestGRE(VppTestCase): # # Add a route that resolves the tunnel's destination # - route_tun_dst = VppIpRoute(self, "1002::1", 128, - [VppRoutePath(self.pg2.remote_ip6, - self.pg2.sw_if_index, - is_ip6=1)], - is_ip6=1) + route_tun_dst = VppIpRoute( + self, "1002::1", 128, + [VppRoutePath(self.pg2.remote_ip6, + self.pg2.sw_if_index, + proto=DpoProto.DPO_PROTO_IP6)], + is_ip6=1) route_tun_dst.add_vpp_config() # diff --git a/test/test_ip6.py b/test/test_ip6.py index 593f6868..285ce181 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -8,7 +8,7 @@ from vpp_sub_interface import VppSubInterface, VppDot1QSubint from vpp_pg_interface import is_ipv6_misc from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, VppIpMRoute, \ VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \ - VppMplsRoute + VppMplsRoute, DpoProto from vpp_neighbor import find_nbr, VppNeighbor from scapy.packet import Raw @@ -490,7 +490,7 @@ class TestIPv6(TestIPv6ND): inet=AF_INET6)) def test_ns_duplicates(self): - """ ARP Duplicates""" + """ ND Duplicates""" # # Generate some hosts on the LAN @@ -537,7 +537,7 @@ class TestIPv6(TestIPv6ND): # # remove the duplicate on pg1 - # packet stream shoud generate ARPs out of pg1 + # packet stream shoud generate NSs out of pg1 # ns_pg1.remove_vpp_config() @@ -1347,10 +1347,10 @@ class TestIP6LoadBalance(VppTestCase): route_3000_1 = VppIpRoute(self, "3000::1", 128, [VppRoutePath(self.pg1.remote_ip6, self.pg1.sw_if_index, - is_ip6=1), + proto=DpoProto.DPO_PROTO_IP6), VppRoutePath(self.pg2.remote_ip6, self.pg2.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_3000_1.add_vpp_config() @@ -1367,11 +1367,11 @@ class TestIP6LoadBalance(VppTestCase): [VppRoutePath(self.pg1.remote_ip6, self.pg1.sw_if_index, labels=[67], - is_ip6=1), + proto=DpoProto.DPO_PROTO_IP6), VppRoutePath(self.pg2.remote_ip6, self.pg2.sw_if_index, labels=[67], - is_ip6=1)]) + proto=DpoProto.DPO_PROTO_IP6)]) route_67.add_vpp_config() # @@ -1441,20 +1441,20 @@ class TestIP6LoadBalance(VppTestCase): route_3000_2 = VppIpRoute(self, "3000::2", 128, [VppRoutePath(self.pg3.remote_ip6, self.pg3.sw_if_index, - is_ip6=1), + proto=DpoProto.DPO_PROTO_IP6), VppRoutePath(self.pg4.remote_ip6, self.pg4.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_3000_2.add_vpp_config() route_4000_1 = VppIpRoute(self, "4000::1", 128, [VppRoutePath("3000::1", 0xffffffff, - is_ip6=1), + proto=DpoProto.DPO_PROTO_IP6), VppRoutePath("3000::2", 0xffffffff, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_4000_1.add_vpp_config() @@ -1485,14 +1485,14 @@ class TestIP6LoadBalance(VppTestCase): route_5000_2 = VppIpRoute(self, "5000::2", 128, [VppRoutePath(self.pg3.remote_ip6, self.pg3.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_5000_2.add_vpp_config() route_6000_1 = VppIpRoute(self, "6000::1", 128, [VppRoutePath("5000::2", 0xffffffff, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_6000_1.add_vpp_config() diff --git a/test/test_map.py b/test/test_map.py index 9ac3948a..bbf4aec2 100644 --- a/test/test_map.py +++ b/test/test_map.py @@ -4,7 +4,7 @@ import unittest import socket from framework import VppTestCase, VppTestRunner -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto from scapy.layers.l2 import Ether, Raw from scapy.layers.inet import IP, UDP, ICMP @@ -75,7 +75,7 @@ class TestMAP(VppTestCase): map_br_pfx_len, [VppRoutePath(self.pg1.remote_ip6, self.pg1.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) map_route.add_vpp_config() @@ -138,13 +138,12 @@ class TestMAP(VppTestCase): # Add a route to 4001::1. Expect the encapped traffic to be # sent via that routes next-hop # - pre_res_route = VppIpRoute(self, - "4001::1", - 128, - [VppRoutePath(self.pg1.remote_hosts[2].ip6, - self.pg1.sw_if_index, - is_ip6=1)], - is_ip6=1) + pre_res_route = VppIpRoute( + self, "4001::1", 128, + [VppRoutePath(self.pg1.remote_hosts[2].ip6, + self.pg1.sw_if_index, + proto=DpoProto.DPO_PROTO_IP6)], + is_ip6=1) pre_res_route.add_vpp_config() self.send_and_assert_encapped(v4, map_src, @@ -156,7 +155,7 @@ class TestMAP(VppTestCase): # pre_res_route.modify([VppRoutePath(self.pg1.remote_hosts[3].ip6, self.pg1.sw_if_index, - is_ip6=1)]) + proto=DpoProto.DPO_PROTO_IP6)]) pre_res_route.add_vpp_config() self.send_and_assert_encapped(v4, map_src, diff --git a/test/test_mpls.py b/test/test_mpls.py index e3d013af..b2226a74 100644 --- a/test/test_mpls.py +++ b/test/test_mpls.py @@ -6,7 +6,7 @@ import socket from framework import VppTestCase, VppTestRunner from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsRoute, \ VppMplsIpBind, VppIpMRoute, VppMRoutePath, \ - MRouteItfFlags, MRouteEntryFlags + MRouteItfFlags, MRouteEntryFlags, DpoProto from vpp_mpls_tunnel_interface import VppMPLSTunnelInterface from scapy.packet import Raw @@ -16,6 +16,38 @@ from scapy.layers.inet6 import IPv6 from scapy.contrib.mpls import MPLS +def verify_filter(capture, sent): + if not len(capture) == len(sent): + # filter out any IPv6 RAs from the capture + for p in capture: + if p.haslayer(IPv6): + capture.remove(p) + return capture + + +def verify_mpls_stack(tst, rx, mpls_labels, ttl=255, num=0): + # the rx'd packet has the MPLS label popped + eth = rx[Ether] + tst.assertEqual(eth.type, 0x8847) + + rx_mpls = rx[MPLS] + + for ii in range(len(mpls_labels)): + tst.assertEqual(rx_mpls.label, mpls_labels[ii]) + tst.assertEqual(rx_mpls.cos, 0) + if ii == num: + tst.assertEqual(rx_mpls.ttl, ttl) + else: + tst.assertEqual(rx_mpls.ttl, 255) + if ii == len(mpls_labels) - 1: + tst.assertEqual(rx_mpls.s, 1) + else: + # not end of stack + tst.assertEqual(rx_mpls.s, 0) + # pop the label to expose the next + rx_mpls = rx_mpls[MPLS].payload + + class TestMPLS(VppTestCase): """ MPLS Test Case """ @@ -120,18 +152,9 @@ class TestMPLS(VppTestCase): pkts.append(p) return pkts - @staticmethod - def verify_filter(capture, sent): - if not len(capture) == len(sent): - # filter out any IPv6 RAs from the capture - for p in capture: - if p.haslayer(IPv6): - capture.remove(p) - return capture - def verify_capture_ip4(self, src_if, capture, sent, ping_resp=0): try: - capture = self.verify_filter(capture, sent) + capture = verify_filter(capture, sent) self.assertEqual(len(capture), len(sent)) @@ -158,33 +181,10 @@ class TestMPLS(VppTestCase): except: raise - def verify_mpls_stack(self, rx, mpls_labels, ttl=255, num=0): - # the rx'd packet has the MPLS label popped - eth = rx[Ether] - self.assertEqual(eth.type, 0x8847) - - rx_mpls = rx[MPLS] - - for ii in range(len(mpls_labels)): - self.assertEqual(rx_mpls.label, mpls_labels[ii]) - self.assertEqual(rx_mpls.cos, 0) - if ii == num: - self.assertEqual(rx_mpls.ttl, ttl) - else: - self.assertEqual(rx_mpls.ttl, 255) - - if ii == len(mpls_labels) - 1: - self.assertEqual(rx_mpls.s, 1) - else: - # not end of stack - self.assertEqual(rx_mpls.s, 0) - # pop the label to expose the next - rx_mpls = rx_mpls[MPLS].payload - def verify_capture_labelled_ip4(self, src_if, capture, sent, mpls_labels): try: - capture = self.verify_filter(capture, sent) + capture = verify_filter(capture, sent) self.assertEqual(len(capture), len(sent)) @@ -195,8 +195,8 @@ class TestMPLS(VppTestCase): rx_ip = rx[IP] # the MPLS TTL is copied from the IP - self.verify_mpls_stack( - rx, mpls_labels, rx_ip.ttl, len(mpls_labels) - 1) + verify_mpls_stack(self, rx, mpls_labels, rx_ip.ttl, + len(mpls_labels) - 1) self.assertEqual(rx_ip.src, tx_ip.src) self.assertEqual(rx_ip.dst, tx_ip.dst) @@ -211,7 +211,7 @@ class TestMPLS(VppTestCase): if top is None: top = len(mpls_labels) - 1 try: - capture = self.verify_filter(capture, sent) + capture = verify_filter(capture, sent) self.assertEqual(len(capture), len(sent)) @@ -222,8 +222,7 @@ class TestMPLS(VppTestCase): rx_ip = rx[IP] # the MPLS TTL is 255 since it enters a new tunnel - self.verify_mpls_stack( - rx, mpls_labels, ttl, top) + verify_mpls_stack(self, rx, mpls_labels, ttl, top) self.assertEqual(rx_ip.src, tx_ip.src) self.assertEqual(rx_ip.dst, tx_ip.dst) @@ -236,13 +235,13 @@ class TestMPLS(VppTestCase): def verify_capture_labelled(self, src_if, capture, sent, mpls_labels, ttl=254, num=0): try: - capture = self.verify_filter(capture, sent) + capture = verify_filter(capture, sent) self.assertEqual(len(capture), len(sent)) for i in range(len(capture)): rx = capture[i] - self.verify_mpls_stack(rx, mpls_labels, ttl, num) + verify_mpls_stack(self, rx, mpls_labels, ttl, num) except: raise @@ -1049,7 +1048,7 @@ class TestMPLS(VppTestCase): self.pg1.sw_if_index, nh_table_id=1, rpf_id=55, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_multicast=1) route_34_eos.add_vpp_config() @@ -1440,19 +1439,20 @@ class TestMPLSPIC(VppTestCase): for ii in range(64): dst = "3000::%d" % ii local_label = 1600 + ii - vpn_routes.append(VppIpRoute(self, dst, 128, - [VppRoutePath(self.pg2.remote_ip6, - 0xffffffff, - nh_table_id=1, - is_resolve_attached=1, - is_ip6=1), - VppRoutePath(self.pg3.remote_ip6, - 0xffffffff, - nh_table_id=1, - is_ip6=1, - is_resolve_attached=1)], - table_id=1, - is_ip6=1)) + vpn_routes.append(VppIpRoute( + self, dst, 128, + [VppRoutePath(self.pg2.remote_ip6, + 0xffffffff, + nh_table_id=1, + is_resolve_attached=1, + proto=DpoProto.DPO_PROTO_IP6), + VppRoutePath(self.pg3.remote_ip6, + 0xffffffff, + nh_table_id=1, + proto=DpoProto.DPO_PROTO_IP6, + is_resolve_attached=1)], + table_id=1, + is_ip6=1)) vpn_routes[ii].add_vpp_config() vpn_bindings.append(VppMplsIpBind(self, local_label, dst, 128, @@ -1525,5 +1525,211 @@ class TestMPLSPIC(VppTestCase): self.assertNotEqual(0, len(rx1)) +class TestMPLSL2(VppTestCase): + """ MPLS-L2 """ + + def setUp(self): + super(TestMPLSL2, self).setUp() + + # create 2 pg interfaces + self.create_pg_interfaces(range(2)) + + # use pg0 as the core facing interface + self.pg0.admin_up() + self.pg0.config_ip4() + self.pg0.resolve_arp() + self.pg0.enable_mpls() + + # use the other 2 for customer facg L2 links + for i in self.pg_interfaces[1:]: + i.admin_up() + + def tearDown(self): + super(TestMPLSL2, self).tearDown() + for i in self.pg_interfaces[1:]: + i.admin_down() + + self.pg0.disable_mpls() + self.pg0.unconfig_ip4() + self.pg0.admin_down() + + def verify_capture_tunneled_ethernet(self, capture, sent, mpls_labels, + ttl=255, top=None): + if top is None: + top = len(mpls_labels) - 1 + + capture = verify_filter(capture, sent) + + self.assertEqual(len(capture), len(sent)) + + for i in range(len(capture)): + tx = sent[i] + rx = capture[i] + + # the MPLS TTL is 255 since it enters a new tunnel + verify_mpls_stack(self, rx, mpls_labels, ttl, top) + + tx_eth = tx[Ether] + rx_eth = Ether(str(rx[MPLS].payload)) + + self.assertEqual(rx_eth.src, tx_eth.src) + self.assertEqual(rx_eth.dst, tx_eth.dst) + + def test_vpws(self): + """ Virtual Private Wire Service """ + + # + # Create an MPLS tunnel that pushes 1 label + # + mpls_tun_1 = VppMPLSTunnelInterface(self, + [VppRoutePath(self.pg0.remote_ip4, + self.pg0.sw_if_index, + labels=[42])], + is_l2=1) + mpls_tun_1.add_vpp_config() + mpls_tun_1.admin_up() + + # + # Create a label entry to for 55 that does L2 input to the tunnel + # + route_55_eos = VppMplsRoute( + self, 55, 1, + [VppRoutePath("0.0.0.0", + mpls_tun_1.sw_if_index, + is_interface_rx=1, + proto=DpoProto.DPO_PROTO_ETHERNET)]) + route_55_eos.add_vpp_config() + + # + # Cross-connect the tunnel with one of the customers L2 interfaces + # + self.vapi.sw_interface_set_l2_xconnect(self.pg1.sw_if_index, + mpls_tun_1.sw_if_index, + enable=1) + self.vapi.sw_interface_set_l2_xconnect(mpls_tun_1.sw_if_index, + self.pg1.sw_if_index, + enable=1) + + # + # inject a packet from the core + # + pcore = (Ether(dst=self.pg0.local_mac, + src=self.pg0.remote_mac) / + MPLS(label=55, ttl=64) / + Ether(dst="00:00:de:ad:ba:be", + src="00:00:de:ad:be:ef") / + IP(src="10.10.10.10", dst="11.11.11.11") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + + self.pg0.add_stream(pcore * 65) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx0 = self.pg1.get_capture(65) + tx = pcore[MPLS].payload + + self.assertEqual(rx0[0][Ether].dst, tx[Ether].dst) + self.assertEqual(rx0[0][Ether].src, tx[Ether].src) + + # + # Inject a packet from the custoer/L2 side + # + self.pg1.add_stream(tx * 65) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx0 = self.pg0.get_capture(65) + + self.verify_capture_tunneled_ethernet(rx0, tx*65, [42]) + + def test_vpls(self): + """ Virtual Private LAN Service """ + # + # Create an L2 MPLS tunnel + # + mpls_tun = VppMPLSTunnelInterface(self, + [VppRoutePath(self.pg0.remote_ip4, + self.pg0.sw_if_index, + labels=[42])], + is_l2=1) + mpls_tun.add_vpp_config() + mpls_tun.admin_up() + + # + # Create a label entry to for 55 that does L2 input to the tunnel + # + route_55_eos = VppMplsRoute( + self, 55, 1, + [VppRoutePath("0.0.0.0", + mpls_tun.sw_if_index, + is_interface_rx=1, + proto=DpoProto.DPO_PROTO_ETHERNET)]) + route_55_eos.add_vpp_config() + + # + # add to tunnel to the customers bridge-domain + # + self.vapi.sw_interface_set_l2_bridge(mpls_tun.sw_if_index, + bd_id=1) + self.vapi.sw_interface_set_l2_bridge(self.pg1.sw_if_index, + bd_id=1) + + # + # Packet from the customer interface and from the core + # + p_cust = (Ether(dst="00:00:de:ad:ba:be", + src="00:00:de:ad:be:ef") / + IP(src="10.10.10.10", dst="11.11.11.11") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + p_core = (Ether(src="00:00:de:ad:ba:be", + dst="00:00:de:ad:be:ef") / + IP(dst="10.10.10.10", src="11.11.11.11") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)) + + # + # The BD is learning, so send in one of each packet to learn + # + p_core_encap = (Ether(dst=self.pg0.local_mac, + src=self.pg0.remote_mac) / + MPLS(label=55, ttl=64) / + p_core) + + self.pg1.add_stream(p_cust) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg0.add_stream(p_core_encap) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # we've learnt this so expect it be be forwarded + rx0 = self.pg1.get_capture(1) + + self.assertEqual(rx0[0][Ether].dst, p_core[Ether].dst) + self.assertEqual(rx0[0][Ether].src, p_core[Ether].src) + + # + # now a stream in each direction + # + self.pg1.add_stream(p_cust * 65) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx0 = self.pg0.get_capture(65) + + self.verify_capture_tunneled_ethernet(rx0, p_cust*65, [42]) + + # + # remove interfaces from customers bridge-domain + # + self.vapi.sw_interface_set_l2_bridge(mpls_tun.sw_if_index, + bd_id=1, + enable=0) + self.vapi.sw_interface_set_l2_bridge(self.pg1.sw_if_index, + bd_id=1, + enable=0) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/test_p2p_ethernet.py b/test/test_p2p_ethernet.py index 37a1d18b..8688f7e6 100644 --- a/test/test_p2p_ethernet.py +++ b/test/test_p2p_ethernet.py @@ -11,7 +11,7 @@ from scapy.layers.inet6 import IPv6 from framework import VppTestCase, VppTestRunner, running_extended_tests from vpp_sub_interface import VppP2PSubint -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto from util import mactobinary @@ -219,7 +219,7 @@ class P2PEthernetIPV6(VppTestCase): route_8000 = VppIpRoute(self, "8000::", 64, [VppRoutePath(self.pg0.remote_ip6, self.pg0.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_8000.add_vpp_config() @@ -239,7 +239,7 @@ class P2PEthernetIPV6(VppTestCase): route_9001 = VppIpRoute(self, "9001::", 64, [VppRoutePath(self.pg1.remote_ip6, self.pg1.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_9001.add_vpp_config() @@ -264,7 +264,7 @@ class P2PEthernetIPV6(VppTestCase): route_3 = VppIpRoute(self, "9000::", 64, [VppRoutePath(self.pg1._remote_hosts[0].ip6, self.pg1.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_3.add_vpp_config() @@ -289,7 +289,7 @@ class P2PEthernetIPV6(VppTestCase): route_9001 = VppIpRoute(self, "9000::", 64, [VppRoutePath(self.pg1._remote_hosts[0].ip6, self.pg1.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_9001.add_vpp_config() @@ -310,19 +310,19 @@ class P2PEthernetIPV6(VppTestCase): route_8000 = VppIpRoute(self, "8000::", 64, [VppRoutePath(self.pg0.remote_ip6, self.pg0.sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_8000.add_vpp_config() route_8001 = VppIpRoute(self, "8001::", 64, [VppRoutePath(self.p2p_sub_ifs[0].remote_ip6, self.p2p_sub_ifs[0].sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_8001.add_vpp_config() route_8002 = VppIpRoute(self, "8002::", 64, [VppRoutePath(self.p2p_sub_ifs[1].remote_ip6, self.p2p_sub_ifs[1].sw_if_index, - is_ip6=1)], + proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1) route_8002.add_vpp_config() diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index badb3102..2c489e3c 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -29,6 +29,14 @@ class MRouteEntryFlags: MFIB_ENTRY_FLAG_INHERIT_ACCEPT = 8 +class DpoProto: + DPO_PROTO_IP4 = 0 + DPO_PROTO_IP6 = 1 + DPO_PROTO_MPLS = 2 + DPO_PROTO_ETHERNET = 3 + DPO_PROTO_NSH = 4 + + def find_route(test, ip_addr, len, table_id=0, inet=AF_INET): if inet == AF_INET: s = 4 @@ -55,22 +63,24 @@ class VppRoutePath(object): nh_table_id=0, labels=[], nh_via_label=MPLS_LABEL_INVALID, - is_ip6=0, rpf_id=0, is_interface_rx=0, is_resolve_host=0, - is_resolve_attached=0): + is_resolve_attached=0, + proto=DpoProto.DPO_PROTO_IP4): self.nh_itf = nh_sw_if_index self.nh_table_id = nh_table_id self.nh_via_label = nh_via_label self.nh_labels = labels self.weight = 1 self.rpf_id = rpf_id - self.is_ip4 = 1 if is_ip6 == 0 else 0 - if self.is_ip4: + self.proto = proto + if self.proto is DpoProto.DPO_PROTO_IP6: + self.nh_addr = inet_pton(AF_INET6, nh_addr) + elif self.proto is DpoProto.DPO_PROTO_IP4: self.nh_addr = inet_pton(AF_INET, nh_addr) else: - self.nh_addr = inet_pton(AF_INET6, nh_addr) + self.nh_addr = inet_pton(AF_INET6, "::") self.is_resolve_host = is_resolve_host self.is_resolve_attached = is_resolve_attached self.is_interface_rx = is_interface_rx @@ -401,7 +411,7 @@ class VppMplsRoute(VppObject): self._test.vapi.mpls_route_add_del( self.local_label, self.eos_bit, - path.is_ip4, + path.proto, path.nh_addr, path.nh_itf, is_multicast=self.is_multicast, @@ -420,7 +430,7 @@ class VppMplsRoute(VppObject): for path in self.paths: self._test.vapi.mpls_route_add_del(self.local_label, self.eos_bit, - 1, + path.proto, path.nh_addr, path.nh_itf, is_rpf_id=path.is_rpf_id, diff --git a/test/vpp_mpls_tunnel_interface.py b/test/vpp_mpls_tunnel_interface.py index f2001574..0542b05c 100644 --- a/test/vpp_mpls_tunnel_interface.py +++ b/test/vpp_mpls_tunnel_interface.py @@ -9,13 +9,14 @@ class VppMPLSTunnelInterface(VppInterface): VPP MPLS Tunnel interface """ - def __init__(self, test, paths, is_multicast=0): + def __init__(self, test, paths, is_multicast=0, is_l2=0): """ Create MPLS Tunnel interface """ self._sw_if_index = 0 super(VppMPLSTunnelInterface, self).__init__(test) self._test = test self.t_paths = paths self.is_multicast = is_multicast + self.is_l2 = is_l2 def add_vpp_config(self): self._sw_if_index = 0xffffffff @@ -29,7 +30,8 @@ class VppMPLSTunnelInterface(VppInterface): path.weight, next_hop_out_label_stack=path.nh_labels, next_hop_n_out_labels=len(path.nh_labels), - is_multicast=self.is_multicast) + is_multicast=self.is_multicast, + l2_only=self.is_l2) self._sw_if_index = reply.sw_if_index def remove_vpp_config(self): diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 801a6c2d..3ba2ad4a 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -921,7 +921,7 @@ class VppPapiProvider(object): self, label, eos, - next_hop_proto_is_ip4, + next_hop_proto, next_hop_address, next_hop_sw_if_index=0xFFFFFFFF, table_id=0, @@ -982,7 +982,7 @@ class VppPapiProvider(object): 'mr_is_resolve_attached': is_resolve_attached, 'mr_is_interface_rx': is_interface_rx, 'mr_is_rpf_id': is_rpf_id, - 'mr_next_hop_proto_is_ip4': next_hop_proto_is_ip4, + 'mr_next_hop_proto': next_hop_proto, 'mr_next_hop_weight': next_hop_weight, 'mr_next_hop': next_hop_address, 'mr_next_hop_n_out_labels': next_hop_n_out_labels, -- cgit 1.2.3-korg From 203c0794837c519c32af1cf638bfd47fd17a35de Mon Sep 17 00:00:00 2001 From: ShenJibiao Date: Thu, 31 Aug 2017 13:57:48 +0800 Subject: Free memory the 'rpaths' pointers, that may incur memory leaks(VPP-965). Change-Id: I2732c02b97f4602162638bbcf3ab46521c2782da Signed-off-by: ShenJibiao --- src/vpp/app/vpe_cli.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/vpp/app') diff --git a/src/vpp/app/vpe_cli.c b/src/vpp/app/vpe_cli.c index fcc496ad..e19c23de 100644 --- a/src/vpp/app/vpe_cli.c +++ b/src/vpp/app/vpe_cli.c @@ -113,6 +113,7 @@ virtual_ip_cmd_fn_command_fn (vlib_main_t * vm, done: vec_free (mac_addrs); vec_free (next_hops); + vec_free (rpaths); unformat_free (line_input); return error; -- cgit 1.2.3-korg From f49921f73f4e1f0b67823be445aafeb9ff2333a6 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 11 Sep 2017 16:52:11 +0200 Subject: clib_socket: add sendmsg / recvmsg with ancillary data support Change-Id: Ie18580e05ec12291e7026f21ad874e088a712c8e Signed-off-by: Damjan Marion --- src/vlib/unix/cli.c | 4 +- src/vpp/app/vppctl.c | 2 +- src/vppinfra/socket.c | 130 ++++++++++++++++++++++++++++++++++++++++----- src/vppinfra/socket.h | 49 +++++++++++++---- src/vppinfra/test_socket.c | 6 +-- 5 files changed, 164 insertions(+), 27 deletions(-) (limited to 'src/vpp/app') diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index 39368823..1567cc2a 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -2664,8 +2664,8 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) vec_free (tmp); } - s->flags = SOCKET_IS_SERVER | /* listen, don't connect */ - SOCKET_ALLOW_GROUP_WRITE; /* PF_LOCAL socket only */ + s->flags = CLIB_SOCKET_F_IS_SERVER | /* listen, don't connect */ + CLIB_SOCKET_F_ALLOW_GROUP_WRITE; /* PF_LOCAL socket only */ error = clib_socket_init (s); if (error) diff --git a/src/vpp/app/vppctl.c b/src/vpp/app/vppctl.c index a8f3eab0..980936f1 100644 --- a/src/vpp/app/vppctl.c +++ b/src/vpp/app/vppctl.c @@ -158,7 +158,7 @@ main (int argc, char *argv[]) while (argc--) cmd = format (cmd, "%s%c", (argv++)[0], argc ? ' ' : 0); - s->flags = SOCKET_IS_CLIENT; + s->flags = CLIB_SOCKET_F_IS_CLIENT; error = clib_socket_init (s); if (error) diff --git a/src/vppinfra/socket.c b/src/vppinfra/socket.c index 37dcbbfd..87a9333f 100644 --- a/src/vppinfra/socket.c +++ b/src/vppinfra/socket.c @@ -35,17 +35,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include +#include /* strchr */ +#define __USE_GNU #include #include +#include #include #include #include #include #include -#include #include -#include /* strchr */ #include #include @@ -233,7 +234,7 @@ default_socket_read (clib_socket_t * sock, int n_bytes) u8 *buf; /* RX side of socket is down once end of file is reached. */ - if (sock->flags & SOCKET_RX_END_OF_FILE) + if (sock->flags & CLIB_SOCKET_F_RX_END_OF_FILE) return 0; fd = sock->fd; @@ -255,7 +256,7 @@ default_socket_read (clib_socket_t * sock, int n_bytes) /* Other side closed the socket. */ if (n_read == 0) - sock->flags |= SOCKET_RX_END_OF_FILE; + sock->flags |= CLIB_SOCKET_F_RX_END_OF_FILE; non_fatal: _vec_len (sock->rx_buffer) += n_read - n_bytes; @@ -271,6 +272,91 @@ default_socket_close (clib_socket_t * s) return 0; } +static clib_error_t * +default_socket_sendmsg (clib_socket_t * s, void *msg, int msglen, + int fds[], int num_fds) +{ + struct msghdr mh = { 0 }; + struct iovec iov[1]; + char ctl[CMSG_SPACE (sizeof (int)) * num_fds]; + int rv; + + iov[0].iov_base = msg; + iov[0].iov_len = msglen; + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + if (num_fds > 0) + { + struct cmsghdr *cmsg; + memset (&ctl, 0, sizeof (ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + cmsg = CMSG_FIRSTHDR (&mh); + cmsg->cmsg_len = CMSG_LEN (sizeof (int) * num_fds); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy (CMSG_DATA (cmsg), fds, sizeof (int) * num_fds); + } + rv = sendmsg (s->fd, &mh, 0); + if (rv < 0) + return clib_error_return_unix (0, "sendmsg"); + return 0; +} + + +static clib_error_t * +default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen, + int fds[], int num_fds) +{ + char ctl[CMSG_SPACE (sizeof (int) * num_fds) + + CMSG_SPACE (sizeof (struct ucred))]; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + ssize_t size; + struct ucred *cr = 0; + struct cmsghdr *cmsg; + + iov[0].iov_base = msg; + iov[0].iov_len = msglen; + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + + memset (ctl, 0, sizeof (ctl)); + + /* receive the incoming message */ + size = recvmsg (s->fd, &mh, 0); + if (size != msglen) + { + return (size == 0) ? clib_error_return (0, "disconnected") : + clib_error_return_unix (0, "recvmsg: malformed message (fd %d, '%s')", + s->fd, s->config); + } + + cmsg = CMSG_FIRSTHDR (&mh); + while (cmsg) + { + if (cmsg->cmsg_level == SOL_SOCKET) + { + if (cmsg->cmsg_type == SCM_CREDENTIALS) + { + cr = (struct ucred *) CMSG_DATA (cmsg); + s->uid = cr->uid; + s->gid = cr->gid; + s->pid = cr->pid; + } + else if (cmsg->cmsg_type == SCM_RIGHTS) + { + clib_memcpy (fds, CMSG_DATA (cmsg), num_fds * sizeof (int)); + } + } + cmsg = CMSG_NXTHDR (&mh, cmsg); + } + return 0; +} + static void socket_init_funcs (clib_socket_t * s) { @@ -280,6 +366,10 @@ socket_init_funcs (clib_socket_t * s) s->read_func = default_socket_read; if (!s->close_func) s->close_func = default_socket_close; + if (!s->sendmsg_func) + s->sendmsg_func = default_socket_sendmsg; + if (!s->recvmsg_func) + s->recvmsg_func = default_socket_recvmsg; } clib_error_t * @@ -291,18 +381,22 @@ clib_socket_init (clib_socket_t * s) struct sockaddr_un su; } addr; socklen_t addr_len = 0; + int socket_type; clib_error_t *error = 0; word port; error = socket_config (s->config, &addr.sa, &addr_len, - (s->flags & SOCKET_IS_SERVER + (s->flags & CLIB_SOCKET_F_IS_SERVER ? INADDR_LOOPBACK : INADDR_ANY)); if (error) goto done; socket_init_funcs (s); - s->fd = socket (addr.sa.sa_family, SOCK_STREAM, 0); + socket_type = s->flags & CLIB_SOCKET_F_SEQPACKET ? + SOCK_SEQPACKET : SOCK_STREAM; + + s->fd = socket (addr.sa.sa_family, socket_type, 0); if (s->fd < 0) { error = clib_error_return_unix (0, "socket (fd %d, '%s')", @@ -314,7 +408,7 @@ clib_socket_init (clib_socket_t * s) if (addr.sa.sa_family == PF_INET) port = ((struct sockaddr_in *) &addr)->sin_port; - if (s->flags & SOCKET_IS_SERVER) + if (s->flags & CLIB_SOCKET_F_IS_SERVER) { uword need_bind = 1; @@ -342,6 +436,18 @@ clib_socket_init (clib_socket_t * s) clib_unix_warning ("setsockopt SO_REUSEADDR fails"); } + if (addr.sa.sa_family == PF_LOCAL && s->flags & CLIB_SOCKET_F_PASSCRED) + { + int x = 1; + if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &x, sizeof (x)) < 0) + { + error = clib_error_return_unix (0, "setsockopt (SO_PASSCRED, " + "fd %d, '%s')", s->fd, + s->config); + goto done; + } + } + if (need_bind && bind (s->fd, &addr.sa, addr_len) < 0) { error = clib_error_return_unix (0, "bind (fd %d, '%s')", @@ -356,7 +462,7 @@ clib_socket_init (clib_socket_t * s) goto done; } if (addr.sa.sa_family == PF_LOCAL - && s->flags & SOCKET_ALLOW_GROUP_WRITE) + && s->flags & CLIB_SOCKET_F_ALLOW_GROUP_WRITE) { struct stat st = { 0 }; if (stat (((struct sockaddr_un *) &addr)->sun_path, &st) < 0) @@ -378,7 +484,7 @@ clib_socket_init (clib_socket_t * s) } else { - if ((s->flags & SOCKET_NON_BLOCKING_CONNECT) + if ((s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT) && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0) { error = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')", @@ -387,7 +493,7 @@ clib_socket_init (clib_socket_t * s) } if (connect (s->fd, &addr.sa, addr_len) < 0 - && !((s->flags & SOCKET_NON_BLOCKING_CONNECT) && + && !((s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT) && errno == EINPROGRESS)) { error = clib_error_return_unix (0, "connect (fd %d, '%s')", @@ -434,7 +540,7 @@ clib_socket_accept (clib_socket_t * server, clib_socket_t * client) goto close_client; } - client->flags = SOCKET_IS_CLIENT; + client->flags = CLIB_SOCKET_F_IS_CLIENT; socket_init_funcs (client); return 0; diff --git a/src/vppinfra/socket.h b/src/vppinfra/socket.h index 75037208..4f9e9509 100644 --- a/src/vppinfra/socket.h +++ b/src/vppinfra/socket.h @@ -55,13 +55,14 @@ typedef struct _socket_t char *config; u32 flags; -#define SOCKET_IS_SERVER (1 << 0) -#define SOCKET_IS_CLIENT (0 << 0) -#define SOCKET_NON_BLOCKING_CONNECT (1 << 1) -#define SOCKET_ALLOW_GROUP_WRITE (1 << 2) +#define CLIB_SOCKET_F_IS_SERVER (1 << 0) +#define CLIB_SOCKET_F_IS_CLIENT (0 << 0) +#define CLIB_SOCKET_F_RX_END_OF_FILE (1 << 2) +#define CLIB_SOCKET_F_NON_BLOCKING_CONNECT (1 << 3) +#define CLIB_SOCKET_F_ALLOW_GROUP_WRITE (1 << 4) +#define CLIB_SOCKET_F_SEQPACKET (1 << 5) +#define CLIB_SOCKET_F_PASSCRED (1 << 6) - /* Read returned end-of-file. */ -#define SOCKET_RX_END_OF_FILE (1 << 2) /* Transmit buffer. Holds data waiting to be written. */ u8 *tx_buffer; @@ -72,10 +73,19 @@ typedef struct _socket_t /* Peer socket we are connected to. */ struct sockaddr_in peer; + /* Credentials, populated if CLIB_SOCKET_F_PASSCRED is set */ + pid_t pid; + uid_t uid; + gid_t gid; + clib_error_t *(*write_func) (struct _socket_t * sock); clib_error_t *(*read_func) (struct _socket_t * sock, int min_bytes); clib_error_t *(*close_func) (struct _socket_t * sock); - void *private_data; + clib_error_t *(*recvmsg_func) (struct _socket_t * s, void *msg, int msglen, + int fds[], int num_fds); + clib_error_t *(*sendmsg_func) (struct _socket_t * s, void *msg, int msglen, + int fds[], int num_fds); + uword private_data; } clib_socket_t; /* socket config format is host:port. @@ -89,7 +99,7 @@ clib_error_t *clib_socket_accept (clib_socket_t * server, always_inline uword clib_socket_is_server (clib_socket_t * sock) { - return (sock->flags & SOCKET_IS_SERVER) != 0; + return (sock->flags & CLIB_SOCKET_F_IS_SERVER) != 0; } always_inline uword @@ -98,10 +108,17 @@ clib_socket_is_client (clib_socket_t * s) return !clib_socket_is_server (s); } +always_inline uword +clib_socket_is_connected (clib_socket_t * sock) +{ + return sock->fd > 0; +} + + always_inline int clib_socket_rx_end_of_file (clib_socket_t * s) { - return s->flags & SOCKET_RX_END_OF_FILE; + return s->flags & CLIB_SOCKET_F_RX_END_OF_FILE; } always_inline void * @@ -130,6 +147,20 @@ clib_socket_rx (clib_socket_t * s, int n_bytes) return s->read_func (s, n_bytes); } +always_inline clib_error_t * +clib_socket_sendmsg (clib_socket_t * s, void *msg, int msglen, + int fds[], int num_fds) +{ + return s->sendmsg_func (s, msg, msglen, fds, num_fds); +} + +always_inline clib_error_t * +clib_socket_recvmsg (clib_socket_t * s, void *msg, int msglen, + int fds[], int num_fds) +{ + return s->recvmsg_func (s, msg, msglen, fds, num_fds); +} + always_inline void clib_socket_free (clib_socket_t * s) { diff --git a/src/vppinfra/test_socket.c b/src/vppinfra/test_socket.c index 0b05467a..2f25eccd 100644 --- a/src/vppinfra/test_socket.c +++ b/src/vppinfra/test_socket.c @@ -50,15 +50,15 @@ test_socket_main (unformat_input_t * input) clib_error_t *error; s->config = "localhost:22"; - s->flags = SOCKET_IS_CLIENT; + s->flags = CLIB_SOCKET_F_IS_CLIENT; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "server %s %=", &config, - &s->flags, SOCKET_IS_SERVER)) + &s->flags, CLIB_SOCKET_F_IS_SERVER)) ; else if (unformat (input, "client %s %=", &config, - &s->flags, SOCKET_IS_CLIENT)) + &s->flags, CLIB_SOCKET_F_IS_CLIENT)) ; else { -- cgit 1.2.3-korg From 03add7f5b5e5351790187ea6d7e83803d5be2440 Mon Sep 17 00:00:00 2001 From: Chris Luke Date: Wed, 20 Sep 2017 23:31:24 -0400 Subject: vppctl,cli: Improve non-interactive vppctl (VPP-944) Short version: Make vppctl behave as expected when run from scripts, or without a controlling terminal, and especially when using it with VPP commands on its command line ("non-interactively"). In particular, prevent the welcome banner and VPP CLI prompt from being sent by VPP when being used in these ways. vppctl ------ - Improve vppctl's detection of non-interactive sessions. - Pass non-interactiveness in the terminal type telnet option as a value distinct from "dumb" (which means non-ANSI capable.) - Make tty setup handling more robust. - Only send non-interactive command once we've sent the terminal type, to ensure correct event sequence; we need the VPP cli session to be in line-by-line mode. - Ignore stdin when it looks something like /dev/null. - Skip NUL bytes received from VPP. VPP CLI ------- - Detect "non-interactive" terminal types and set session parameters accordingly. - Add an "interactive" flag that controls whether the welcome banner and CLI prompt are sent. - Detect if telnet options processing switched us into line mode and act accordingly for the rest of the current input buffer. This was causing the command string to be echoed by the CLI editor code. - For non-interactive sessions, send a NUL byte after the input buffer has been processed. This is because vppctl depends on seeing traffic before it will try to close the session; a command with no output would cause it to hang. NUL bytes are ignored by all decent terminals, but we have vppctl strip them out anyway. - Prevent certain commands from running in non-interactive sessions since they manipulate interactive-related features. - For interactive sessions, quench the prompt that prints on VPP shutdown. - Detect and handle socket errors in the CLI; sessions were leaking. - Pevent SIGPIPE from ever being raised; handle EPIPE instead. We don't need VPP to die just because a socket closed just before we try to write to it! - Add a command to dump a list of current CLI sessions; mostly this was to detect session leakage, but it may have some general utility. Change-Id: Ia147da013317180882c1d967b18eefb8519a55fb Signed-off-by: Chris Luke --- src/vlib/unix/cli.c | 359 +++++++++++++++++++++++++++++++++++++++++++-------- src/vpp/app/vppctl.c | 134 +++++++++++++------ 2 files changed, 404 insertions(+), 89 deletions(-) (limited to 'src/vpp/app') diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index 0e035b13..ffe5de6d 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -100,9 +100,6 @@ /** Maximum terminal height we will accept */ #define UNIX_CLI_MAX_TERMINAL_HEIGHT 512 -/** Unix standard in */ -#define UNIX_CLI_STDIN_FD 0 - /** A CLI banner line. */ typedef struct @@ -200,6 +197,18 @@ typedef struct /** Disable the pager? */ u8 no_pager; + /** Whether the session is interactive or not. + * Controls things like initial banner, the CLI prompt etc. */ + u8 is_interactive; + + /** Whether the session is attached to a socket. */ + u8 is_socket; + + /** If EPIPE has been detected, prevent further write-related + * activity on the descriptor. + */ + u8 has_epipe; + /** Pager buffer */ u8 **pager_vector; @@ -593,12 +602,33 @@ unix_vlib_cli_output_raw (unix_cli_file_t * cf, { int n = 0; + if (cf->has_epipe) /* don't try writing anything */ + return; + if (vec_len (cf->output_vector) == 0) - n = write (uf->file_descriptor, buffer, buffer_bytes); + { + if (cf->is_socket) + /* If it's a socket we use MSG_NOSIGNAL to prevent SIGPIPE */ + n = send (uf->file_descriptor, buffer, buffer_bytes, MSG_NOSIGNAL); + else + n = write (uf->file_descriptor, buffer, buffer_bytes); + } if (n < 0 && errno != EAGAIN) { - clib_unix_warning ("write"); + if (errno == EPIPE) + { + /* connection closed on us */ + unix_main_t *um = &unix_main; + cf->has_epipe = 1; + vlib_process_signal_event (um->vlib_main, cf->process_node_index, + UNIX_CLI_PROCESS_EVENT_QUIT, + uf->private_data); + } + else + { + clib_unix_warning ("write"); + } } else if ((word) n < (word) buffer_bytes) { @@ -659,7 +689,9 @@ unix_cli_cli_prompt (unix_cli_file_t * cf, clib_file_t * uf) { unix_cli_main_t *cm = &unix_cli_main; - unix_vlib_cli_output_raw (cf, uf, cm->cli_prompt, vec_len (cm->cli_prompt)); + if (cf->is_interactive) /* Only interactive sessions get a prompt */ + unix_vlib_cli_output_raw (cf, uf, cm->cli_prompt, + vec_len (cm->cli_prompt)); } /** @brief Output a pager prompt and show number of buffered lines */ @@ -1024,7 +1056,7 @@ unix_vlib_cli_output (uword cli_file_index, u8 * buffer, uword buffer_bytes) * terminal sequences; @c 0 otherwise. */ static u8 -unix_cli_terminal_type (u8 * term, uword len) +unix_cli_terminal_type_ansi (u8 * term, uword len) { /* This may later be better done as a hash of some sort. */ #define _(a) do { \ @@ -1042,6 +1074,45 @@ unix_cli_terminal_type (u8 * term, uword len) return 0; } +/** Identify whether a terminal type is non-interactive. + * + * Compares the string given in @c term with a list of terminal types known + * to be non-interactive, as send by tools such as @c vppctl . + * + * This list contains, for example, @c vppctl. + * + * @param term A string with a terminal type in it. + * @param len The length of the string in @c term. + * + * @return @c 1 if the terminal type is recognized as being non-interactive; + * @c 0 otherwise. + */ +static u8 +unix_cli_terminal_type_noninteractive (u8 * term, uword len) +{ + /* This may later be better done as a hash of some sort. */ +#define _(a) do { \ + if (strncasecmp(a, (char *)term, (size_t)len) == 0) return 1; \ + } while(0) + + _("vppctl"); +#undef _ + + return 0; +} + +/** Set a session to be non-interactive. */ +static void +unix_cli_set_session_noninteractive (unix_cli_file_t * cf) +{ + /* Non-interactive sessions don't get these */ + cf->is_interactive = 0; + cf->no_pager = 1; + cf->history_limit = 0; + cf->has_history = 0; + cf->line_mode = 1; +} + /** @brief Emit initial welcome banner and prompt on a connection. */ static void unix_cli_file_welcome (unix_cli_main_t * cm, unix_cli_file_t * cf) @@ -1052,6 +1123,12 @@ unix_cli_file_welcome (unix_cli_main_t * cm, unix_cli_file_t * cf) unix_cli_banner_t *banner; int i, len; + /* Mark the session as started if we get here */ + cf->started = 1; + + if (!(cf->is_interactive)) /* No banner for non-interactive sessions */ + return; + /* * Put the first bytes directly into the buffer so that further output is * queued until everything is ready. (oterwise initial prompt can appear @@ -1082,7 +1159,6 @@ unix_cli_file_welcome (unix_cli_main_t * cm, unix_cli_file_t * cf) /* Prompt. */ unix_cli_cli_prompt (cf, uf); - cf->started = 1; } /** @brief A failsafe triggered on a timer to ensure we send the prompt @@ -1160,9 +1236,20 @@ unix_cli_process_telnet (unix_main_t * um, case TELOPT_TTYPE: if (input_vector[3] != 0) break; - /* See if the terminal type is ANSI capable */ - cf->ansi_capable = - unix_cli_terminal_type (input_vector + 4, i - 5); + { + /* See if the the terminal type is recognized */ + u8 *term = input_vector + 4; + uword len = i - 5; + + /* See if the terminal type is ANSI capable */ + cf->ansi_capable = + unix_cli_terminal_type_ansi (term, len); + + /* See if the terminal type indicates non-interactive */ + if (unix_cli_terminal_type_noninteractive (term, len)) + unix_cli_set_session_noninteractive (cf); + } + /* If session not started, we can release the pause */ if (!cf->started) /* Send the welcome banner and initial prompt */ @@ -2122,10 +2209,12 @@ unix_cli_line_edit (unix_cli_main_t * cm, unix_main_t * um, vec_len (cf->input_vector) - i); if (matched < 0) { + /* There was a partial match which means we need more bytes + * than the input buffer currently has. + */ if (i) { - /* There was a partial match which means we need more bytes - * than the input buffer currently has. + /* * Since the bytes before here have been processed, shift * the remaining contents to the start of the input buffer. */ @@ -2136,6 +2225,16 @@ unix_cli_line_edit (unix_cli_main_t * cm, unix_main_t * um, break; default: + /* If telnet option processing switched us to line mode, get us + * out of here! + */ + if (cf->line_mode) + { + vec_delete (cf->input_vector, i, 0); + cf->current_command = cf->input_vector; + return 0; + } + /* process the action */ if (!unix_cli_line_process_one (cm, um, cf, uf, cf->input_vector[i], action)) @@ -2165,11 +2264,14 @@ unix_cli_process_input (unix_cli_main_t * cm, uword cli_file_index) unformat_input_t input; int vlib_parse_eval (u8 *); + cm->current_input_file_index = cli_file_index; + more: /* Try vlibplex first. Someday... */ if (0 && vlib_parse_eval (cf->input_vector) == 0) goto done; + if (cf->line_mode) { /* just treat whatever we got as a complete line of input */ @@ -2190,20 +2292,16 @@ more: lv = format (lv, "%U[%d]: %v", format_timeval, 0 /* current bat-time */ , 0 /* current bat-format */ , - cli_file_index, cf->input_vector); - { - int rv __attribute__ ((unused)) = - write (um->log_fd, lv, vec_len (lv)); - } + cli_file_index, cf->current_command); + int rv __attribute__ ((unused)) = write (um->log_fd, lv, vec_len (lv)); } - /* Copy our input command to a new string */ + /* Build an unformat structure around our command */ unformat_init_vector (&input, cf->current_command); /* Remove leading white space from input. */ (void) unformat (&input, ""); - cm->current_input_file_index = cli_file_index; cf->pager_start = 0; /* start a new pager session */ if (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) @@ -2222,9 +2320,14 @@ more: done: /* reset vector; we'll re-use it later */ if (cf->line_mode) - vec_reset_length (cf->input_vector); + { + vec_reset_length (cf->input_vector); + cf->current_command = 0; + } else - vec_reset_length (cf->current_command); + { + vec_reset_length (cf->current_command); + } if (cf->no_pager == 2) { @@ -2251,6 +2354,15 @@ done: /* Any residual data in the input vector? */ if (vec_len (cf->input_vector)) goto more; + + /* For non-interactive sessions send a NUL byte. + * Specifically this is because vppctl needs to see some traffic in + * order to move on to closing the session. Commands with no output + * would thus cause vppctl to hang indefinitely in non-interactive mode + * since there is also no prompt sent after the command completes. + */ + if (!cf->is_interactive) + unix_vlib_cli_output_raw (cf, uf, (u8 *) "\0", 1); } /** Destroy a CLI session. @@ -2270,7 +2382,7 @@ unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index) uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); /* Quit/EOF on stdin means quit program. */ - if (uf->file_descriptor == UNIX_CLI_STDIN_FD) + if (uf->file_descriptor == STDIN_FILENO) clib_longjmp (&um->vlib_main->main_loop_exit, VLIB_MAIN_LOOP_EXIT_CLI); vec_free (cf->current_command); @@ -2342,11 +2454,30 @@ unix_cli_write_ready (clib_file_t * uf) cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data); /* Flush output vector. */ - n = write (uf->file_descriptor, - cf->output_vector, vec_len (cf->output_vector)); + if (cf->is_socket) + /* If it's a socket we use MSG_NOSIGNAL to prevent SIGPIPE */ + n = send (uf->file_descriptor, + cf->output_vector, vec_len (cf->output_vector), MSG_NOSIGNAL); + else + n = write (uf->file_descriptor, + cf->output_vector, vec_len (cf->output_vector)); if (n < 0 && errno != EAGAIN) - return clib_error_return_unix (0, "write"); + { + if (errno == EPIPE) + { + /* connection closed on us */ + unix_main_t *um = &unix_main; + cf->has_epipe = 1; + vlib_process_signal_event (um->vlib_main, cf->process_node_index, + UNIX_CLI_PROCESS_EVENT_QUIT, + uf->private_data); + } + else + { + return clib_error_return_unix (0, "write"); + } + } else if (n > 0) unix_cli_del_pending_output (uf, cf, n); @@ -2393,6 +2524,24 @@ unix_cli_read_ready (clib_file_t * uf) return /* no error */ 0; } +/** Called when a CLI session file descriptor has an error condition. */ +static clib_error_t * +unix_cli_error_detected (clib_file_t * uf) +{ + unix_main_t *um = &unix_main; + unix_cli_main_t *cm = &unix_cli_main; + unix_cli_file_t *cf; + + cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data); + cf->has_epipe = 1; /* prevent writes while the close is pending */ + vlib_process_signal_event (um->vlib_main, + cf->process_node_index, + UNIX_CLI_PROCESS_EVENT_QUIT, + /* event data */ uf->private_data); + + return /* no error */ 0; +} + /** Store a new CLI session. * @param name The name of the session. * @param fd The file descriptor for the session I/O. @@ -2443,6 +2592,7 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) template.read_function = unix_cli_read_ready; template.write_function = unix_cli_write_ready; + template.error_function = unix_cli_error_detected; template.file_descriptor = fd; template.private_data = cf - cm->cli_file_pool; @@ -2482,10 +2632,10 @@ unix_cli_listen_read_ready (clib_file_t * uf) cf_index = unix_cli_file_add (cm, client_name, client.fd); cf = pool_elt_at_index (cm->cli_file_pool, cf_index); + cf->is_socket = 1; /* No longer need CLIB version of socket. */ clib_socket_free (&client); - vec_free (client_name); /* if we're supposed to run telnet session in character mode (default) */ @@ -2512,6 +2662,9 @@ unix_cli_listen_read_ready (clib_file_t * uf) cf->history_limit = um->cli_history_limit; cf->has_history = cf->history_limit != 0; + /* This is an interactive session until we decide otherwise */ + cf->is_interactive = 1; + /* Make sure this session is in line mode */ cf->line_mode = 0; @@ -2521,9 +2674,8 @@ unix_cli_listen_read_ready (clib_file_t * uf) /* Setup the pager */ cf->no_pager = um->cli_no_pager; - uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); - /* Send the telnet options */ + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); unix_vlib_cli_output_raw (cf, uf, charmode_option, ARRAY_LEN (charmode_option)); @@ -2550,7 +2702,7 @@ unix_cli_resize_interrupt (int signum) (void) signum; /* Terminal resized, fetch the new size */ - if (ioctl (UNIX_CLI_STDIN_FD, TIOCGWINSZ, &ws) < 0) + if (ioctl (STDIN_FILENO, TIOCGWINSZ, &ws) < 0) { /* "Should never happen..." */ clib_unix_warning ("TIOCGWINSZ"); @@ -2600,17 +2752,17 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) if (um->flags & UNIX_FLAG_INTERACTIVE) { /* Set stdin to be non-blocking. */ - if ((flags = fcntl (UNIX_CLI_STDIN_FD, F_GETFL, 0)) < 0) + if ((flags = fcntl (STDIN_FILENO, F_GETFL, 0)) < 0) flags = 0; - (void) fcntl (UNIX_CLI_STDIN_FD, F_SETFL, flags | O_NONBLOCK); + (void) fcntl (STDIN_FILENO, F_SETFL, flags | O_NONBLOCK); - cf_index = unix_cli_file_add (cm, "stdin", UNIX_CLI_STDIN_FD); + cf_index = unix_cli_file_add (cm, "stdin", STDIN_FILENO); cf = pool_elt_at_index (cm->cli_file_pool, cf_index); cm->stdin_cli_file_index = cf_index; /* If stdin is a tty and we are using chacracter mode, enable * history on the CLI and set the tty line discipline accordingly. */ - if (isatty (UNIX_CLI_STDIN_FD) && um->cli_line_mode == 0) + if (isatty (STDIN_FILENO) && um->cli_line_mode == 0) { /* Capture terminal resize events */ memset (&sa, 0, sizeof (sa)); @@ -2619,7 +2771,7 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) clib_panic ("sigaction"); /* Retrieve the current terminal size */ - ioctl (UNIX_CLI_STDIN_FD, TIOCGWINSZ, &ws); + ioctl (STDIN_FILENO, TIOCGWINSZ, &ws); cf->width = ws.ws_col; cf->height = ws.ws_row; @@ -2634,11 +2786,14 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) /* Setup the pager */ cf->no_pager = um->cli_no_pager; + /* This is an interactive session until we decide otherwise */ + cf->is_interactive = 1; + /* We're going to be in char by char mode */ cf->line_mode = 0; /* Save the original tty state so we can restore it later */ - tcgetattr (UNIX_CLI_STDIN_FD, &um->tio_stdin); + tcgetattr (STDIN_FILENO, &um->tio_stdin); um->tio_isset = 1; /* Tweak the tty settings */ @@ -2647,23 +2802,23 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) tio.c_lflag &= ~(ECHO | ICANON | IEXTEN); tio.c_cc[VMIN] = 1; /* 1 byte at a time */ tio.c_cc[VTIME] = 0; /* no timer */ - tcsetattr (UNIX_CLI_STDIN_FD, TCSAFLUSH, &tio); + tcsetattr (STDIN_FILENO, TCSAFLUSH, &tio); /* See if we can do ANSI/VT100 output */ term = (u8 *) getenv ("TERM"); if (term != NULL) - cf->ansi_capable = unix_cli_terminal_type (term, - strlen ((char *) - term)); + { + int len = strlen ((char *) term); + cf->ansi_capable = unix_cli_terminal_type_ansi (term, len); + if (unix_cli_terminal_type_noninteractive (term, len)) + unix_cli_set_session_noninteractive (cf); + } } else { notty: - /* No tty, so make sure these things are off */ - cf->no_pager = 1; - cf->history_limit = 0; - cf->has_history = 0; - cf->line_mode = 1; + /* No tty, so make sure the session doesn't have tty-like features */ + unix_cli_set_session_noninteractive (cf); } /* Send banner and initial prompt */ @@ -2726,8 +2881,8 @@ unix_cli_exit (vlib_main_t * vm) unix_main_t *um = &unix_main; /* If stdin is a tty and we saved the tty state, reset the tty state */ - if (isatty (UNIX_CLI_STDIN_FD) && um->tio_isset) - tcsetattr (UNIX_CLI_STDIN_FD, TCSAFLUSH, &um->tio_stdin); + if (isatty (STDIN_FILENO) && um->tio_isset) + tcsetattr (STDIN_FILENO, TCSAFLUSH, &um->tio_stdin); return 0; } @@ -2758,6 +2913,12 @@ unix_cli_quit (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { unix_cli_main_t *cm = &unix_cli_main; + unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool, + cm->current_input_file_index); + + /* Cosmetic: suppress the final prompt from appearing before we die */ + cf->is_interactive = 0; + cf->started = 1; vlib_process_signal_event (vm, vlib_current_process (vm), @@ -2940,6 +3101,9 @@ unix_cli_show_history (vlib_main_t * vm, cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index); + if (!cf->is_interactive) + return clib_error_return (0, "invalid for non-interactive sessions"); + if (cf->has_history && cf->history_limit) { i = 1 + cf->command_number - vec_len (cf->command_history); @@ -2985,6 +3149,8 @@ unix_cli_show_terminal (vlib_main_t * vm, vlib_cli_output (vm, "Terminal height: %d\n", cf->height); vlib_cli_output (vm, "ANSI capable: %s\n", cf->ansi_capable ? "yes" : "no"); + vlib_cli_output (vm, "Interactive: %s\n", + cf->is_interactive ? "yes" : "no"); vlib_cli_output (vm, "History enabled: %s%s\n", cf->has_history ? "yes" : "no", !cf->has_history || cf->history_limit ? "" : @@ -3017,6 +3183,7 @@ unix_cli_show_terminal (vlib_main_t * vm, * Terminal width: 123 * Terminal height: 48 * ANSI capable: yes + * Interactive: yes * History enabled: yes * History limit: 50 * Pager enabled: yes @@ -3032,6 +3199,87 @@ VLIB_CLI_COMMAND (cli_unix_cli_show_terminal, static) = { }; /* *INDENT-ON* */ +/** CLI command to display a list of CLI sessions. */ +static clib_error_t * +unix_cli_show_cli_sessions (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + //unix_main_t *um = &unix_main; + unix_cli_main_t *cm = &unix_cli_main; + clib_file_main_t *fm = &file_main; + unix_cli_file_t *cf; + clib_file_t *uf; + vlib_node_t *n; + + vlib_cli_output (vm, "%-5s %-5s %-20s %s", "PNI", "FD", "Name", "Flags"); + +#define fl(x, y) ( (x) ? toupper((y)) : tolower((y)) ) + /* *INDENT-OFF* */ + pool_foreach (cf, cm->cli_file_pool, ({ + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); + n = vlib_get_node (vm, cf->process_node_index); + vlib_cli_output (vm, + "%-5d %-5d %-20v %c%c%c%c%c\n", + cf->process_node_index, + uf->file_descriptor, + n->name, + fl (cf->is_interactive, 'i'), + fl (cf->is_socket, 's'), + fl (cf->line_mode, 'l'), + fl (cf->has_epipe, 'p'), + fl (cf->ansi_capable, 'a')); + })); + /* *INDENT-ON* */ +#undef fl + + return 0; +} + +/*? + * Displays a summary of all the current CLI sessions. + * + * Typically used to diagnose connection issues with the CLI + * socket. + * + * @cliexpar + * @cliexstart{show cli-sessions} + * PNI FD Name Flags + * 343 0 unix-cli-stdin IslpA + * 344 7 unix-cli-local:20 ISlpA + * 346 8 unix-cli-local:21 iSLpa + * @cliexend + + * In this example we have the debug console of the running process + * on stdin/out, we have an interactive socket session and we also + * have a non-interactive socket session. + * + * Fields: + * + * - @em PNI: Process node index. + * - @em FD: Unix file descriptor. + * - @em Name: Name of the session. + * - @em Flags: Various flags that describe the state of the session. + * + * @em Flags have the following meanings; lower-case typically negates + * upper-case: + * + * - @em I Interactive session. + * - @em S Connected by socket. + * - @em s Not a socket, likely stdin. + * - @em L Line-by-line mode. + * - @em l Char-by-char mode. + * - @em P EPIPE detected on connection; it will close soon. + * - @em A ANSI-capable terminal. +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cli_unix_cli_show_cli_sessions, static) = { + .path = "show cli-sessions", + .short_help = "Show current CLI sessions", + .function = unix_cli_show_cli_sessions, +}; +/* *INDENT-ON* */ + /** CLI command to set terminal pager settings. */ static clib_error_t * unix_cli_set_terminal_pager (vlib_main_t * vm, @@ -3044,11 +3292,14 @@ unix_cli_set_terminal_pager (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; clib_error_t *error = 0; + cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index); + + if (!cf->is_interactive) + return clib_error_return (0, "invalid for non-interactive sessions"); + if (!unformat_user (input, unformat_line_input, line_input)) return 0; - cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index); - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "on")) @@ -3100,11 +3351,14 @@ unix_cli_set_terminal_history (vlib_main_t * vm, u32 limit; clib_error_t *error = 0; + cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index); + + if (!cf->is_interactive) + return clib_error_return (0, "invalid for non-interactive sessions"); + if (!unformat_user (input, unformat_line_input, line_input)) return 0; - cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index); - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "on")) @@ -3162,6 +3416,9 @@ unix_cli_set_terminal_ansi (vlib_main_t * vm, cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index); + if (!cf->is_interactive) + return clib_error_return (0, "invalid for non-interactive sessions"); + if (unformat (input, "on")) cf->ansi_capable = 1; else if (unformat (input, "off")) diff --git a/src/vpp/app/vppctl.c b/src/vpp/app/vppctl.c index 980936f1..f81a0ce9 100644 --- a/src/vpp/app/vppctl.c +++ b/src/vpp/app/vppctl.c @@ -41,12 +41,16 @@ volatile int window_resized = 0; struct termios orig_tio; static void -send_ttype (clib_socket_t * s, int is_dumb) +send_ttype (clib_socket_t * s, int is_interactive) { + char *term; + + term = is_interactive ? getenv ("TERM") : "vppctl"; + if (term == NULL) + term = "dumb"; + clib_socket_tx_add_formatted (s, "%c%c%c" "%c%s" "%c%c", - IAC, SB, TELOPT_TTYPE, - 0, is_dumb ? "dumb" : getenv ("TERM"), - IAC, SE); + IAC, SB, TELOPT_TTYPE, 0, term, IAC, SE); clib_socket_tx (s); } @@ -81,7 +85,8 @@ signal_handler_term (int signum) } static u8 * -process_input (u8 * str, clib_socket_t * s, int is_interactive) +process_input (u8 * str, clib_socket_t * s, int is_interactive, + int *sent_ttype) { int i = 0; @@ -104,7 +109,10 @@ process_input (u8 * str, clib_socket_t * s, int is_interactive) vec_free (sb); i += 2; if (opt == TELOPT_TTYPE) - send_ttype (s, !is_interactive); + { + send_ttype (s, is_interactive); + *sent_ttype = 1; + } else if (is_interactive && opt == TELOPT_NAWS) send_naws (s); } @@ -138,6 +146,8 @@ main (int argc, char *argv[]) u8 *str = 0; u8 *cmd = 0; int do_quit = 0; + int is_interactive = 0; + int sent_ttype = 0; clib_mem_init (0, 64ULL << 10); @@ -164,33 +174,47 @@ main (int argc, char *argv[]) if (error) goto done; - /* Capture terminal resize events */ - memset (&sa, 0, sizeof (struct sigaction)); - sa.sa_handler = signal_handler_winch; + is_interactive = isatty (STDIN_FILENO) && cmd == 0; - if (sigaction (SIGWINCH, &sa, 0) < 0) + if (is_interactive) { - error = clib_error_return_unix (0, "sigaction"); - goto done; - } + /* Capture terminal resize events */ + memset (&sa, 0, sizeof (struct sigaction)); + sa.sa_handler = signal_handler_winch; + if (sigaction (SIGWINCH, &sa, 0) < 0) + { + error = clib_error_return_unix (0, "sigaction"); + goto done; + } - sa.sa_handler = signal_handler_term; - if (sigaction (SIGTERM, &sa, 0) < 0) - { - error = clib_error_return_unix (0, "sigaction"); - goto done; - } + /* Capture SIGTERM to reset tty settings */ + sa.sa_handler = signal_handler_term; + if (sigaction (SIGTERM, &sa, 0) < 0) + { + error = clib_error_return_unix (0, "sigaction"); + goto done; + } - /* Save the original tty state so we can restore it later */ - tcgetattr (STDIN_FILENO, &orig_tio); + /* Save the original tty state so we can restore it later */ + if (tcgetattr (STDIN_FILENO, &orig_tio) < 0) + { + error = clib_error_return_unix (0, "tcgetattr"); + goto done; + } + + /* Tweak the tty settings */ + tio = orig_tio; + /* echo off, canonical mode off, ext'd input processing off */ + tio.c_lflag &= ~(ECHO | ICANON | IEXTEN); + tio.c_cc[VMIN] = 1; /* 1 byte at a time */ + tio.c_cc[VTIME] = 0; /* no timer */ - /* Tweak the tty settings */ - tio = orig_tio; - /* echo off, canonical mode off, ext'd input processing off */ - tio.c_lflag &= ~(ECHO | ICANON | IEXTEN); - tio.c_cc[VMIN] = 1; /* 1 byte at a time */ - tio.c_cc[VTIME] = 0; /* no timer */ - tcsetattr (STDIN_FILENO, TCSAFLUSH, &tio); + if (tcsetattr (STDIN_FILENO, TCSAFLUSH, &tio) < 0) + { + error = clib_error_return_unix (0, "tcsetattr"); + goto done; + } + } efd = epoll_create1 (0); @@ -199,8 +223,12 @@ main (int argc, char *argv[]) event.data.fd = STDIN_FILENO; if (epoll_ctl (efd, EPOLL_CTL_ADD, STDIN_FILENO, &event) != 0) { - error = clib_error_return_unix (0, "epoll_ctl[%d]", STDIN_FILENO); - goto done; + /* ignore EPERM; it means stdin is something like /dev/null */ + if (errno != EPERM) + { + error = clib_error_return_unix (0, "epoll_ctl[%d]", STDIN_FILENO); + goto done; + } } /* register socket */ @@ -240,6 +268,9 @@ main (int argc, char *argv[]) int n; char c[100]; + if (!sent_ttype) + continue; /* not ready for this yet */ + n = read (STDIN_FILENO, c, sizeof (c)); if (n > 0) { @@ -250,6 +281,8 @@ main (int argc, char *argv[]) } else if (n < 0) clib_warning ("read rv=%d", n); + else /* EOF */ + do_quit = 1; } else if (event.data.fd == s->fd) { @@ -260,27 +293,49 @@ main (int argc, char *argv[]) if (clib_socket_rx_end_of_file (s)) break; - str = process_input (str, s, cmd == 0); + str = process_input (str, s, is_interactive, &sent_ttype); if (vec_len (str) > 0) { - n = write (STDOUT_FILENO, str, vec_len (str)); - if (n < 0) + int len = vec_len (str); + u8 *p = str, *q = str; + + while (len) { - error = clib_error_return_unix (0, "write"); - goto done; + /* Search for and skip NUL bytes */ + while (q < (p + len) && *q) + q++; + + n = write (STDOUT_FILENO, p, q - p); + if (n < 0) + { + error = clib_error_return_unix (0, "write"); + goto done; + } + + while (q < (p + len) && !*q) + q++; + len -= q - p; + p = q; } + vec_reset_length (str); } if (do_quit) { - clib_socket_tx_add_formatted (s, "q\n"); + /* Ask the other end to close the connection */ + clib_socket_tx_add_formatted (s, "quit\n"); clib_socket_tx (s); do_quit = 0; } - if (cmd) + if (cmd && sent_ttype) { + /* We wait until after the TELNET TTYPE option has been sent. + * That is to make sure the session at the VPP end has switched + * to line-by-line mode, and thus avoid prompts and echoing. + * Note that it does also disable further TELNET option processing. + */ clib_socket_tx_add_formatted (s, "%s\n", cmd); clib_socket_tx (s); vec_free (cmd); @@ -302,12 +357,15 @@ done: if (efd > -1) close (efd); + if (is_interactive) + tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_tio); + if (error) { clib_error_report (error); return 1; } - tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_tio); + return 0; } -- cgit 1.2.3-korg From e3c13e8b5b1e799e687fd93783407548e2b96594 Mon Sep 17 00:00:00 2001 From: Chris Luke Date: Thu, 26 Oct 2017 10:44:43 -0400 Subject: Fix for vppctl and interactive commands (VPP-1038) - Interactive commands like "ping" read extra input from the input stream. - In the case of "ping" it is simply a signal to cease the current operation. - "vppctl", in non-interactive mode, will issue a "quit" immediately after the requested command to queue up closing of the session. - This resulted in "ping" thinking a keypress was seen and returning control to the CLI; the "quit" command however is consumed by the keypress event handler and thus the session does not close. - This patch reworks vppctl slightly to only issue "quit" after the command has completed. In particular it uses the fact that VPP issues NUL bytes as a surrogate prompt between output of commands to signal acknowledgement that the command has completed; vppctl now flags that the quit should be issued after the next such acknowledgement. - Since input it still accepted, the user can still terminate the "ping" early, if desired. Change-Id: I7e3dbe767f32f8e364ccb5f81799759b311585df Signed-off-by: Chris Luke --- src/vpp/app/vppctl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src/vpp/app') diff --git a/src/vpp/app/vppctl.c b/src/vpp/app/vppctl.c index f81a0ce9..66fe00ab 100644 --- a/src/vpp/app/vppctl.c +++ b/src/vpp/app/vppctl.c @@ -147,6 +147,7 @@ main (int argc, char *argv[]) u8 *cmd = 0; int do_quit = 0; int is_interactive = 0; + int acked = 1; /* counts messages from VPP; starts at 1 */ int sent_ttype = 0; @@ -314,7 +315,10 @@ main (int argc, char *argv[]) } while (q < (p + len) && !*q) - q++; + { + q++; + acked++; /* every NUL is an acknowledgement */ + } len -= q - p; p = q; } @@ -322,7 +326,7 @@ main (int argc, char *argv[]) vec_reset_length (str); } - if (do_quit) + if (do_quit && do_quit < acked) { /* Ask the other end to close the connection */ clib_socket_tx_add_formatted (s, "quit\n"); @@ -339,7 +343,7 @@ main (int argc, char *argv[]) clib_socket_tx_add_formatted (s, "%s\n", cmd); clib_socket_tx (s); vec_free (cmd); - do_quit = 1; + do_quit = acked; /* quit after the next response */ } } else -- cgit 1.2.3-korg